source: main/trunk/greenstone2/perllib/util.pm@ 23362

Last change on this file since 23362 was 23362, checked in by davidb, 13 years ago

Additional routines (and few upgraded) to help support Greenstone working with filenames under Windows when then go beyond Latin-1 and start turning up in their DOS abbreviated form (e.g. Test~1.txt)

  • Property svn:keywords set to Author Date Id Revision
File size: 41.4 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61# removes files (but not directories) - can rename this to the default
62# "rm" subroutine when debugging the deletion of individual files.
63sub rm_debug {
64 my (@files) = @_;
65 my @filefiles = ();
66
67 # make sure the files we want to delete exist
68 # and are regular files
69 foreach my $file (@files) {
70 if (!-e $file) {
71 print STDERR "util::rm $file does not exist\n";
72 } elsif ((!-f $file) && (!-l $file)) {
73 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
74 } else { # debug message
75 unlink($file) or warn "Could not delete file $file: $!\n";
76 }
77 }
78}
79
80
81# recursive removal
82sub filtered_rm_r {
83 my ($files,$file_accept_re,$file_reject_re) = @_;
84
85 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
86
87 # recursively remove the files
88 foreach my $file (@files_array) {
89 $file =~ s/[\/\\]+$//; # remove trailing slashes
90
91 if (!-e $file) {
92 print STDERR "util::filtered_rm_r $file does not exist\n";
93
94 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
95 # get the contents of this directory
96 if (!opendir (INDIR, $file)) {
97 print STDERR "util::filtered_rm_r could not open directory $file\n";
98 } else {
99 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
100 closedir (INDIR);
101
102 # remove all the files in this directory
103 map {$_="$file/$_";} @filedir;
104 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
105
106 if (!defined $file_accept_re && !defined $file_reject_re) {
107 # remove this directory
108 if (!rmdir $file) {
109 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
110 }
111 }
112 }
113 } else {
114 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
115
116 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
117 # remove this file
118 &rm ($file);
119 }
120 }
121 }
122}
123
124
125# recursive removal
126sub rm_r {
127 my (@files) = @_;
128
129 # use the more general (but reterospectively written function
130 # filtered_rm_r function()
131
132 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
133}
134
135
136
137
138# moves a file or a group of files
139sub mv {
140 my $dest = pop (@_);
141 my (@srcfiles) = @_;
142
143 # remove trailing slashes from source and destination files
144 $dest =~ s/[\\\/]+$//;
145 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
146
147 # a few sanity checks
148 if (scalar (@srcfiles) == 0) {
149 print STDERR "util::mv no destination directory given\n";
150 return;
151 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
152 print STDERR "util::mv if multiple source files are given the ".
153 "destination must be a directory\n";
154 return;
155 }
156
157 # move the files
158 foreach my $file (@srcfiles) {
159 my $tempdest = $dest;
160 if (-d $tempdest) {
161 my ($filename) = $file =~ /([^\\\/]+)$/;
162 $tempdest .= "/$filename";
163 }
164 if (!-e $file) {
165 print STDERR "util::mv $file does not exist\n";
166 } else {
167 rename ($file, $tempdest);
168 }
169 }
170}
171
172
173# copies a file or a group of files
174sub cp {
175 my $dest = pop (@_);
176 my (@srcfiles) = @_;
177
178 # remove trailing slashes from source and destination files
179 $dest =~ s/[\\\/]+$//;
180 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
181
182 # a few sanity checks
183 if (scalar (@srcfiles) == 0) {
184 print STDERR "util::cp no destination directory given\n";
185 return;
186 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
187 print STDERR "util::cp if multiple source files are given the ".
188 "destination must be a directory\n";
189 return;
190 }
191
192 # copy the files
193 foreach my $file (@srcfiles) {
194 my $tempdest = $dest;
195 if (-d $tempdest) {
196 my ($filename) = $file =~ /([^\\\/]+)$/;
197 $tempdest .= "/$filename";
198 }
199 if (!-e $file) {
200 print STDERR "util::cp $file does not exist\n";
201 } elsif (!-f $file) {
202 print STDERR "util::cp $file is not a plain file\n";
203 } else {
204 &File::Copy::copy ($file, $tempdest);
205 }
206 }
207}
208
209
210
211# recursively copies a file or group of files
212# syntax: cp_r (sourcefiles, destination directory)
213# destination must be a directory - to copy one file to
214# another use cp instead
215sub cp_r {
216 my $dest = pop (@_);
217 my (@srcfiles) = @_;
218
219 # a few sanity checks
220 if (scalar (@srcfiles) == 0) {
221 print STDERR "util::cp_r no destination directory given\n";
222 return;
223 } elsif (-f $dest) {
224 print STDERR "util::cp_r destination must be a directory\n";
225 return;
226 }
227
228 # create destination directory if it doesn't exist already
229 if (! -d $dest) {
230 my $store_umask = umask(0002);
231 mkdir ($dest, 0777);
232 umask($store_umask);
233 }
234
235 # copy the files
236 foreach my $file (@srcfiles) {
237
238 if (!-e $file) {
239 print STDERR "util::cp_r $file does not exist\n";
240
241 } elsif (-d $file) {
242 # make the new directory
243 my ($filename) = $file =~ /([^\\\/]*)$/;
244 $dest = &util::filename_cat ($dest, $filename);
245 my $store_umask = umask(0002);
246 mkdir ($dest, 0777);
247 umask($store_umask);
248
249 # get the contents of this directory
250 if (!opendir (INDIR, $file)) {
251 print STDERR "util::cp_r could not open directory $file\n";
252 } else {
253 my @filedir = readdir (INDIR);
254 closedir (INDIR);
255 foreach my $f (@filedir) {
256 next if $f =~ /^\.\.?$/;
257 # copy all the files in this directory
258 my $ff = &util::filename_cat ($file, $f);
259 &cp_r ($ff, $dest);
260 }
261 }
262
263 } else {
264 &cp($file, $dest);
265 }
266 }
267}
268# recursively copies a file or group of files
269# syntax: cp_r (sourcefiles, destination directory)
270# destination must be a directory - to copy one file to
271# another use cp instead
272sub cp_r_nosvn {
273 my $dest = pop (@_);
274 my (@srcfiles) = @_;
275
276 # a few sanity checks
277 if (scalar (@srcfiles) == 0) {
278 print STDERR "util::cp_r no destination directory given\n";
279 return;
280 } elsif (-f $dest) {
281 print STDERR "util::cp_r destination must be a directory\n";
282 return;
283 }
284
285 # create destination directory if it doesn't exist already
286 if (! -d $dest) {
287 my $store_umask = umask(0002);
288 mkdir ($dest, 0777);
289 umask($store_umask);
290 }
291
292 # copy the files
293 foreach my $file (@srcfiles) {
294
295 if (!-e $file) {
296 print STDERR "util::cp_r $file does not exist\n";
297
298 } elsif (-d $file) {
299 # make the new directory
300 my ($filename) = $file =~ /([^\\\/]*)$/;
301 $dest = &util::filename_cat ($dest, $filename);
302 my $store_umask = umask(0002);
303 mkdir ($dest, 0777);
304 umask($store_umask);
305
306 # get the contents of this directory
307 if (!opendir (INDIR, $file)) {
308 print STDERR "util::cp_r could not open directory $file\n";
309 } else {
310 my @filedir = readdir (INDIR);
311 closedir (INDIR);
312 foreach my $f (@filedir) {
313 next if $f =~ /^\.\.?$/;
314 next if $f =~ /^\.svn$/;
315 # copy all the files in this directory
316 my $ff = &util::filename_cat ($file, $f);
317 &cp_r ($ff, $dest);
318 }
319 }
320
321 } else {
322 &cp($file, $dest);
323 }
324 }
325}
326
327# copies a directory and its contents, excluding subdirectories, into a new directory
328sub cp_r_toplevel {
329 my $dest = pop (@_);
330 my (@srcfiles) = @_;
331
332 # a few sanity checks
333 if (scalar (@srcfiles) == 0) {
334 print STDERR "util::cp_r no destination directory given\n";
335 return;
336 } elsif (-f $dest) {
337 print STDERR "util::cp_r destination must be a directory\n";
338 return;
339 }
340
341 # create destination directory if it doesn't exist already
342 if (! -d $dest) {
343 my $store_umask = umask(0002);
344 mkdir ($dest, 0777);
345 umask($store_umask);
346 }
347
348 # copy the files
349 foreach my $file (@srcfiles) {
350
351 if (!-e $file) {
352 print STDERR "util::cp_r $file does not exist\n";
353
354 } elsif (-d $file) {
355 # make the new directory
356 my ($filename) = $file =~ /([^\\\/]*)$/;
357 $dest = &util::filename_cat ($dest, $filename);
358 my $store_umask = umask(0002);
359 mkdir ($dest, 0777);
360 umask($store_umask);
361
362 # get the contents of this directory
363 if (!opendir (INDIR, $file)) {
364 print STDERR "util::cp_r could not open directory $file\n";
365 } else {
366 my @filedir = readdir (INDIR);
367 closedir (INDIR);
368 foreach my $f (@filedir) {
369 next if $f =~ /^\.\.?$/;
370
371 # copy all the files in this directory, but not directories
372 my $ff = &util::filename_cat ($file, $f);
373 if (-f $ff) {
374 &cp($ff, $dest);
375 #&cp_r ($ff, $dest);
376 }
377 }
378 }
379
380 } else {
381 &cp($file, $dest);
382 }
383 }
384}
385
386sub mk_dir {
387 my ($dir) = @_;
388
389 my $store_umask = umask(0002);
390 my $mkdir_ok = mkdir ($dir, 0777);
391 umask($store_umask);
392
393 if (!$mkdir_ok)
394 {
395 print STDERR "util::mk_dir could not create directory $dir\n";
396 return;
397 }
398}
399
400# in case anyone cares - I did some testing (using perls Benchmark module)
401# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
402# slightly faster (surprisingly) - Stefan.
403sub mk_all_dir {
404 my ($dir) = @_;
405
406 # use / for the directory separator, remove duplicate and
407 # trailing slashes
408 $dir=~s/[\\\/]+/\//g;
409 $dir=~s/[\\\/]+$//;
410
411 # make sure the cache directory exists
412 my $dirsofar = "";
413 my $first = 1;
414 foreach my $dirname (split ("/", $dir)) {
415 $dirsofar .= "/" unless $first;
416 $first = 0;
417
418 $dirsofar .= $dirname;
419
420 next if $dirname =~ /^(|[a-z]:)$/i;
421 if (!-e $dirsofar)
422 {
423 my $store_umask = umask(0002);
424 my $mkdir_ok = mkdir ($dirsofar, 0777);
425 umask($store_umask);
426 if (!$mkdir_ok)
427 {
428 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
429 return;
430 }
431 }
432 }
433}
434
435# make hard link to file if supported by OS, otherwise copy the file
436sub hard_link {
437 my ($src, $dest, $verbosity) = @_;
438
439 # remove trailing slashes from source and destination files
440 $src =~ s/[\\\/]+$//;
441 $dest =~ s/[\\\/]+$//;
442
443## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
444 # a few sanity checks
445 if (-e $dest) {
446 # destination file already exists
447 return;
448 }
449 elsif (!-e $src) {
450 print STDERR "util::hard_link source file \"$src\" does not exist\n";
451 return 1;
452 }
453 elsif (-d $src) {
454 print STDERR "util::hard_link source \"$src\" is a directory\n";
455 return 1;
456 }
457
458 my $dest_dir = &File::Basename::dirname($dest);
459 mk_all_dir($dest_dir) if (!-e $dest_dir);
460
461
462 if (!link($src, $dest)) {
463 if ((!defined $verbosity) || ($verbosity>2)) {
464 print STDERR "util::hard_link: unable to create hard link. ";
465 print STDERR " Copying file: $src -> $dest\n";
466 }
467 &File::Copy::copy ($src, $dest);
468 }
469 return 0;
470}
471
472# make soft link to file if supported by OS, otherwise copy file
473sub soft_link {
474 my ($src, $dest, $ensure_paths_absolute) = @_;
475
476 # remove trailing slashes from source and destination files
477 $src =~ s/[\\\/]+$//;
478 $dest =~ s/[\\\/]+$//;
479
480 # Ensure file paths are absolute IF requested to do so
481 # Soft_linking didn't work for relative paths
482 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
483 # We need to ensure that the src file is the absolute path
484 # See http://perldoc.perl.org/File/Spec.html
485 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
486 $src = File::Spec->rel2abs($src); # make absolute
487 }
488 # Might as well ensure that the destination file's absolute path is used
489 if(!File::Spec->file_name_is_absolute( $dest )) {
490 $dest = File::Spec->rel2abs($dest); # make absolute
491 }
492 }
493
494 # a few sanity checks
495 if (!-e $src) {
496 print STDERR "util::soft_link source file $src does not exist\n";
497 return 0;
498 }
499
500 my $dest_dir = &File::Basename::dirname($dest);
501 mk_all_dir($dest_dir) if (!-e $dest_dir);
502
503 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
504 # symlink not supported on windows
505 &File::Copy::copy ($src, $dest);
506
507 } elsif (!eval {symlink($src, $dest)}) {
508 print STDERR "util::soft_link: unable to create soft link.\n";
509 return 0;
510 }
511
512 return 1;
513}
514
515# Primarily for filenames generated by processing
516# content of HTML files (which are mapped to UTF-8 internally)
517#
518# To turn this into an octet string that really exists on the file
519# system:
520# 1. don't need to do anything special for Unix-based systems
521# (as underlying file system is byte-code)
522# 2. need to map to short DOS filenames for Windows
523
524sub utf8_to_real_filename
525{
526 my ($utf8_filename) = @_;
527
528 my $real_filename;
529
530 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
531 require Win32;
532 my $unicode_filename = decode("utf8",$utf8_filename);
533 $real_filename = Win32::GetShortPathName($unicode_filename);
534 }
535 else {
536 $real_filename = $utf8_filename;
537 }
538
539 return $real_filename;
540}
541
542
543sub fd_exists
544{
545 my $filename_full_path = shift @_;
546 my $test_op = shift @_ || "-e";
547
548 # By default tests for existance of file or directory (-e)
549 # Can be made more specific by providing second parameter (e.g. -f or -d)
550
551 my $exists = 0;
552
553 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
554 require Win32;
555 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
556 if (!defined $filename_short_path) {
557 # Was probably still in UTF8 form (not what is needed on Windows)
558 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
559 if (defined $unicode_filename_full_path) {
560 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
561 }
562 }
563 $filename_full_path = $filename_short_path;
564 }
565
566 if (defined $filename_full_path) {
567 $exists = eval "($test_op \$filename_full_path)";
568 }
569
570 return $exists;
571}
572
573sub file_exists
574{
575 my ($filename_full_path) = @_;
576
577 return fd_exists($filename_full_path,"-f");
578}
579
580sub dir_exists
581{
582 my ($filename_full_path) = @_;
583
584 return fd_exists($filename_full_path,"-d");
585}
586
587
588
589# updates a copy of a directory in some other part of the filesystem
590# verbosity settings are: 0=low, 1=normal, 2=high
591# both $fromdir and $todir should be absolute paths
592sub cachedir {
593 my ($fromdir, $todir, $verbosity) = @_;
594 $verbosity = 1 unless defined $verbosity;
595
596 # use / for the directory separator, remove duplicate and
597 # trailing slashes
598 $fromdir=~s/[\\\/]+/\//g;
599 $fromdir=~s/[\\\/]+$//;
600 $todir=~s/[\\\/]+/\//g;
601 $todir=~s/[\\\/]+$//;
602
603 &mk_all_dir ($todir);
604
605 # get the directories in ascending order
606 if (!opendir (FROMDIR, $fromdir)) {
607 print STDERR "util::cachedir could not read directory $fromdir\n";
608 return;
609 }
610 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
611 closedir (FROMDIR);
612
613 if (!opendir (TODIR, $todir)) {
614 print STDERR "util::cacedir could not read directory $todir\n";
615 return;
616 }
617 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
618 closedir (TODIR);
619
620 my $fromi = 0;
621 my $toi = 0;
622
623 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
624# print "fromi: $fromi toi: $toi\n";
625
626 # see if we should delete a file/directory
627 # this should happen if the file/directory
628 # is not in the from list or if its a different
629 # size, or has an older timestamp
630 if ($toi < scalar(@todir)) {
631 if (($fromi >= scalar(@fromdir)) ||
632 ($todir[$toi] lt $fromdir[$fromi] ||
633 ($todir[$toi] eq $fromdir[$fromi] &&
634 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
635 $verbosity)))) {
636
637 # the files are different
638 &rm_r("$todir/$todir[$toi]");
639 splice(@todir, $toi, 1); # $toi stays the same
640
641 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
642 # the files are the same
643 # if it is a directory, check its contents
644 if (-d "$todir/$todir[$toi]") {
645 &cachedir ("$fromdir/$fromdir[$fromi]",
646 "$todir/$todir[$toi]", $verbosity);
647 }
648
649 $toi++;
650 $fromi++;
651 next;
652 }
653 }
654
655 # see if we should insert a file/directory
656 # we should insert a file/directory if there
657 # is no tofiles left or if the tofile does not exist
658 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
659 $todir[$toi] gt $fromdir[$fromi])) {
660 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
661 splice (@todir, $toi, 0, $fromdir[$fromi]);
662
663 $toi++;
664 $fromi++;
665 }
666 }
667}
668
669# this function returns -1 if either file is not found
670# assumes that $file1 and $file2 are absolute file names or
671# in the current directory
672# $file2 is allowed to be newer than $file1
673sub differentfiles {
674 my ($file1, $file2, $verbosity) = @_;
675 $verbosity = 1 unless defined $verbosity;
676
677 $file1 =~ s/\/+$//;
678 $file2 =~ s/\/+$//;
679
680 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
681 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
682
683 return -1 unless (-e $file1 && -e $file2);
684 if ($file1name ne $file2name) {
685 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
686 return 1;
687 }
688
689 my @file1stat = stat ($file1);
690 my @file2stat = stat ($file2);
691
692 if (-d $file1) {
693 if (! -d $file2) {
694 print STDERR "one file is a directory\n" if ($verbosity >= 2);
695 return 1;
696 }
697 return 0;
698 }
699
700 # both must be regular files
701 unless (-f $file1 && -f $file2) {
702 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
703 return 1;
704 }
705
706 # the size of the files must be the same
707 if ($file1stat[7] != $file2stat[7]) {
708 print STDERR "different sized files\n" if ($verbosity >= 2);
709 return 1;
710 }
711
712 # the second file cannot be older than the first
713 if ($file1stat[9] > $file2stat[9]) {
714 print STDERR "file is older\n" if ($verbosity >= 2);
715 return 1;
716 }
717
718 return 0;
719}
720
721
722sub get_tmp_filename
723{
724 my $file_ext = shift(@_) || undef;
725
726 my $opt_dot_file_ext = "";
727 if (defined $file_ext) {
728 if ($file_ext !~ m/\./) {
729 # no dot, so needs one added in at start
730 $opt_dot_file_ext = ".$file_ext"
731 }
732 else {
733 # allow for "extensions" such as _metadata.txt to be handled
734 # gracefully
735 $opt_dot_file_ext = $file_ext;
736 }
737 }
738
739 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
740 &mk_all_dir ($tmpdir) unless -e $tmpdir;
741
742 my $count = 1000;
743 my $rand = int(rand $count);
744 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
745
746 while (-e $full_tmp_filename) {
747 $rand = int(rand $count);
748 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
749 $count++;
750 }
751
752 return $full_tmp_filename;
753}
754
755sub get_timestamped_tmp_folder
756{
757
758 my $tmp_dirname;
759 if(defined $ENV{'GSDLCOLLECTDIR'}) {
760 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
761 } elsif(defined $ENV{'GSDLHOME'}) {
762 $tmp_dirname = $ENV{'GSDLHOME'};
763 } else {
764 return undef;
765 }
766
767 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
768 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
769
770 # add the timestamp into the path otherwise we can run into problems
771 # if documents have the same name
772 my $timestamp = time;
773 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
774 $tmp_dirname = $time_tmp_dirname;
775 my $i = 1;
776 while (-e $tmp_dirname) {
777 $tmp_dirname = "$time_tmp_dirname$i";
778 $i++;
779 }
780 &util::mk_dir($tmp_dirname);
781
782 return $tmp_dirname;
783}
784
785sub get_timestamped_tmp_filename_in_collection
786{
787
788 my ($input_filename, $output_ext) = @_;
789 # derive tmp filename from input filename
790 my ($tailname, $dirname, $suffix)
791 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
792
793 # softlink to collection tmp dir
794 my $tmp_dirname = &util::get_timestamped_tmp_folder();
795 $tmp_dirname = $dirname unless defined $tmp_dirname;
796
797 # following two steps copied from ConvertBinaryFile
798 # do we need them?? can't use them as is, as they use plugin methods.
799
800 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
801
802 # URLEncode this since htmls with images where the html filename is utf8 don't seem
803 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
804 # files on the filesystem.
805 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
806 if (defined $output_ext) {
807 $output_ext = ".$output_ext"; # add the dot
808 } else {
809 $output_ext = $suffix;
810 }
811 $output_ext= lc($output_ext);
812 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
813
814 return $tmp_filename;
815}
816
817sub get_toplevel_tmp_dir
818{
819 return filename_cat($ENV{'GSDLHOME'}, "tmp");
820}
821
822
823sub filename_to_regex {
824 my $filename = shift (@_);
825
826 # need to put single backslash back to double so that regex works
827 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
828 $filename =~ s/\\/\\\\/g;
829 }
830 return $filename;
831}
832
833sub filename_cat {
834 my $first_file = shift(@_);
835 my (@filenames) = @_;
836
837# Useful for debugging
838# -- might make sense to call caller(0) rather than (1)??
839# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
840# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
841
842 # If first_file is not null or empty, then add it back into the list
843 if (defined $first_file && $first_file =~ /\S/) {
844 unshift(@filenames, $first_file);
845 }
846
847 my $filename = join("/", @filenames);
848
849 # remove duplicate slashes and remove the last slash
850 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
851 $filename =~ s/[\\\/]+/\\/g;
852 } else {
853 $filename =~ s/[\/]+/\//g;
854 # DB: want a filename abc\de.html to remain like this
855 }
856 $filename =~ s/[\\\/]$//;
857
858 return $filename;
859}
860
861
862sub pathname_cat {
863 my $first_path = shift(@_);
864 my (@pathnames) = @_;
865
866 # If first_path is not null or empty, then add it back into the list
867 if (defined $first_path && $first_path =~ /\S/) {
868 unshift(@pathnames, $first_path);
869 }
870
871 my $join_char;
872 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
873 $join_char = ";";
874 } else {
875 $join_char = ":";
876 }
877
878 my $pathname = join($join_char, @pathnames);
879
880 # remove duplicate slashes
881 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
882 $pathname =~ s/[\\\/]+/\\/g;
883 } else {
884 $pathname =~ s/[\/]+/\//g;
885 # DB: want a pathname abc\de.html to remain like this
886 }
887
888 return $pathname;
889}
890
891
892sub tidy_up_oid {
893 my ($OID) = @_;
894 if ($OID =~ /\./) {
895 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
896 $OID =~ s/\.//g; #remove any periods
897 }
898 if ($OID =~ /^\s.*\s$/) {
899 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
900 # remove starting and trailing whitespace
901 $OID =~ s/^\s+//;
902 $OID =~ s/\s+$//;
903 }
904 if ($OID =~ /^[\d]*$/) {
905 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
906 $OID = "D" . $OID;
907 }
908
909 return $OID;
910}
911sub envvar_prepend {
912 my ($var,$val) = @_;
913
914 # do not prepend any value/path that's already in the environment variable
915 if ($ENV{'GSDLOS'} =~ /^windows$/i)
916 {
917 my $escaped_val = $val;
918 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
919 if (!defined($ENV{$var})) {
920 $ENV{$var} = "$val";
921 }
922 elsif($ENV{$var} !~ m/$escaped_val/) {
923 $ENV{$var} = "$val;".$ENV{$var};
924 }
925 }
926 else {
927 if (!defined($ENV{$var})) {
928 $ENV{$var} = "$val";
929 }
930 elsif($ENV{$var} !~ m/$val/) {
931 $ENV{$var} = "$val:".$ENV{$var};
932 }
933 }
934}
935
936sub envvar_append {
937 my ($var,$val) = @_;
938
939 # do not append any value/path that's already in the environment variable
940 if ($ENV{'GSDLOS'} =~ /^windows$/i)
941 {
942 my $escaped_val = $val;
943 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
944 if (!defined($ENV{$var})) {
945 $ENV{$var} = "$val";
946 }
947 elsif($ENV{$var} !~ m/$escaped_val/) {
948 $ENV{$var} .= ";$val";
949 }
950 }
951 else {
952 if (!defined($ENV{$var})) {
953 $ENV{$var} = "$val";
954 }
955 elsif($ENV{$var} !~ m/$val/) {
956 $ENV{$var} .= ":$val";
957 }
958 }
959}
960
961
962# splits a filename into a prefix and a tail extension using the tail_re, or
963# if that fails, splits on the file_extension . (dot)
964sub get_prefix_and_tail_by_regex {
965
966 my ($filename,$tail_re) = @_;
967
968 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
969 if ((!defined $file_prefix) || (!defined $file_ext)) {
970 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
971 }
972
973 return ($file_prefix,$file_ext);
974}
975
976# get full path and file only path from a base_dir (which may be empty) and
977# file (which may contain directories)
978sub get_full_filenames {
979 my ($base_dir, $file) = @_;
980
981 my $filename_full_path = $file;
982 # add on directory if present
983 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
984
985 my $filename_no_path = $file;
986
987 # remove directory if present
988 $filename_no_path =~ s/^.*[\/\\]//;
989 return ($filename_full_path, $filename_no_path);
990}
991
992# returns the path of a file without the filename -- ie. the directory the file is in
993sub filename_head {
994 my $filename = shift(@_);
995
996 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
997 $filename =~ s/[^\\\\]*$//;
998 }
999 else {
1000 $filename =~ s/[^\\\/]*$//;
1001 }
1002
1003 return $filename;
1004}
1005
1006
1007
1008# returns 1 if filename1 and filename2 point to the same
1009# file or directory
1010sub filenames_equal {
1011 my ($filename1, $filename2) = @_;
1012
1013 # use filename_cat to clean up trailing slashes and
1014 # multiple slashes
1015 $filename1 = filename_cat ($filename1);
1016 $filename2 = filename_cat ($filename2);
1017
1018 # filenames not case sensitive on windows
1019 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1020 $filename1 =~ tr/[A-Z]/[a-z]/;
1021 $filename2 =~ tr/[A-Z]/[a-z]/;
1022 }
1023 return 1 if $filename1 eq $filename2;
1024 return 0;
1025}
1026
1027
1028sub filename_within_directory
1029{
1030 my ($filename,$within_dir) = @_;
1031
1032 my $dirsep = &util::get_dirsep();
1033 if ($within_dir !~ m/$dirsep$/) {
1034 $within_dir .= $dirsep;
1035 }
1036
1037 $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
1038
1039 if ($filename =~ m/^$within_dir(.*)$/) {
1040 $filename = $1;
1041 }
1042
1043 return $filename;
1044}
1045
1046sub filename_within_collection
1047{
1048 my ($filename) = @_;
1049
1050 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1051
1052 if (defined $collect_dir) {
1053
1054 # if from within GSDLCOLLECTDIR, then remove directory prefix
1055 # so source_filename is realative to it. This is done to aid
1056 # portability, i.e. the collection can be moved to somewhere
1057 # else on the file system and the archives directory will still
1058 # work. This is needed, for example in the applet version of
1059 # GLI where GSDLHOME/collect on the server will be different to
1060 # the collect directory of the remove user. Of course,
1061 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1062 # it back into a full pathname.
1063
1064 $filename = filename_within_directory($filename,$collect_dir);
1065 }
1066
1067 return $filename;
1068}
1069
1070sub prettyprint_file
1071{
1072 my ($base_dir,$file) = @_;
1073
1074 my $filename_full_path = &util::filename_cat($base_dir,$file);
1075
1076 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1077 require Win32;
1078
1079 # For some reason base_dir in the form c:/a/b/c
1080 # This leads to confusion later on, so turn it back into
1081 # the more usual Windows form
1082 $base_dir =~ s/\//\\/g;
1083 my $long_base_dir = Win32::GetLongPathName($base_dir);
1084 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1085
1086 $file = filename_within_directory($long_full_path,$long_base_dir);
1087 }
1088
1089 return $file;
1090}
1091
1092
1093sub upgrade_if_dos_filename
1094{
1095 my ($filename_full_path) = @_;
1096
1097 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1098 require Win32;
1099 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1100 # to its long (Windows) version
1101 $filename_full_path = Win32::GetLongPathName($filename_full_path);
1102 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1103 $filename_full_path =~ s/^(.)/\l$1/;
1104 }
1105
1106 return $filename_full_path;
1107}
1108
1109
1110sub filename_is_absolute
1111{
1112 my ($filename) = @_;
1113
1114 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1115 return ($filename =~ m/^(\w:)?\\/);
1116 }
1117 else {
1118 return ($filename =~ m/^\//);
1119 }
1120}
1121
1122
1123## @method make_absolute()
1124#
1125# Ensure the given file path is absolute in respect to the given base path.
1126#
1127# @param $base_dir A string denoting the base path the given dir must be
1128# absolute to.
1129# @param $dir The directory to be made absolute as a string. Note that the
1130# dir may already be absolute, in which case it will remain
1131# unchanged.
1132# @return The now absolute form of the directory as a string.
1133#
1134# @author John Thompson, DL Consulting Ltd.
1135# @copy 2006 DL Consulting Ltd.
1136#
1137#used in buildcol.pl, doesn't work for all cases --kjdon
1138sub make_absolute {
1139
1140 my ($base_dir, $dir) = @_;
1141### print STDERR "dir = $dir\n";
1142 $dir =~ s/[\\\/]+/\//g;
1143 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1144 $dir =~ s|^/tmp_mnt||;
1145 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1146 $dir =~ s|/[.][.]?/|/|g;
1147 $dir =~ tr|/|/|s;
1148### print STDERR "dir = $dir\n";
1149
1150 return $dir;
1151}
1152## make_absolute() ##
1153
1154sub get_dirsep {
1155
1156 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1157 return "\\";
1158 } else {
1159 return "\/";
1160 }
1161}
1162
1163sub get_os_dirsep {
1164
1165 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1166 return "\\\\";
1167 } else {
1168 return "\\\/";
1169 }
1170}
1171
1172sub get_re_dirsep {
1173
1174 return "\\\\|\\\/";
1175}
1176
1177
1178sub get_dirsep_tail {
1179 my ($filename) = @_;
1180
1181 # returns last part of directory or filename
1182 # On unix e.g. a/b.d => b.d
1183 # a/b/c => c
1184
1185 my $dirsep = get_re_dirsep();
1186 my @dirs = split (/$dirsep/, $filename);
1187 my $tail = pop @dirs;
1188
1189 # - caused problems under windows
1190 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1191
1192 return $tail;
1193}
1194
1195
1196# if this is running on windows we want binaries to end in
1197# .exe, otherwise they don't have to end in any extension
1198sub get_os_exe {
1199 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1200 return "";
1201}
1202
1203
1204# test to see whether this is a big or little endian machine
1205sub is_little_endian
1206{
1207 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1208 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1209 # Otherwise, it's little endian
1210
1211 #return 0 if $^O =~ /^darwin$/i;
1212 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1213
1214 # Going back to stating exactly whether the machine is little endian
1215 # or big endian, without any special case for Macs. Since for rata it comes
1216 # back with little endian and for shuttle with bigendian.
1217 return (ord(substr(pack("s",1), 0, 1)) == 1);
1218}
1219
1220
1221# will return the collection name if successful, "" otherwise
1222sub use_collection {
1223 my ($collection, $collectdir) = @_;
1224
1225 if (!defined $collectdir || $collectdir eq "") {
1226 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1227 }
1228
1229 # get and check the collection
1230 if (!defined($collection) || $collection eq "") {
1231 if (defined $ENV{'GSDLCOLLECTION'}) {
1232 $collection = $ENV{'GSDLCOLLECTION'};
1233 } else {
1234 print STDOUT "No collection specified\n";
1235 return "";
1236 }
1237 }
1238
1239 if ($collection eq "modelcol") {
1240 print STDOUT "You can't use modelcol.\n";
1241 return "";
1242 }
1243
1244 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1245 # are defined
1246 $ENV{'GSDLCOLLECTION'} = $collection;
1247 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1248
1249 # make sure this collection exists
1250 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1251 print STDOUT "Invalid collection ($collection).\n";
1252 return "";
1253 }
1254
1255 # everything is ready to go
1256 return $collection;
1257}
1258
1259sub get_current_collection_name {
1260 return $ENV{'GSDLCOLLECTION'};
1261}
1262
1263
1264# will return the collection name if successful, "" otherwise.
1265# Like use_collection (above) but for greenstone 3 (taking account of site level)
1266
1267sub use_site_collection {
1268 my ($site, $collection, $collectdir) = @_;
1269
1270 if (!defined $collectdir || $collectdir eq "") {
1271 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1272 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1273 }
1274
1275 # collectdir explicitly set by this point (using $site variable if required).
1276 # Can call "old" gsdl2 use_collection now.
1277
1278 return use_collection($collection,$collectdir);
1279}
1280
1281
1282
1283sub locate_config_file
1284{
1285 my ($file) = @_;
1286
1287 my $locations = locate_config_files($file);
1288
1289 return shift @$locations; # returns undef if 'locations' is empty
1290}
1291
1292
1293sub locate_config_files
1294{
1295 my ($file) = @_;
1296
1297 my @locations = ();
1298
1299 if (-e $file) {
1300 # Clearly specified (most likely full filename)
1301 # No need to hunt in 'etc' directories, return value unchanged
1302 push(@locations,$file);
1303 }
1304 else {
1305 # Check for collection specific one before looking in global GSDL 'etc'
1306 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1307 my $test_collect_etc_filename
1308 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1309
1310 if (-e $test_collect_etc_filename) {
1311 push(@locations,$test_collect_etc_filename);
1312 }
1313 }
1314 my $test_main_etc_filename
1315 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1316 if (-e $test_main_etc_filename) {
1317 push(@locations,$test_main_etc_filename);
1318 }
1319 }
1320
1321 return \@locations;
1322}
1323
1324
1325sub hyperlink_text
1326{
1327 my ($text) = @_;
1328
1329 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1330 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1331
1332 return $text;
1333}
1334
1335
1336# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1337# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1338sub is_dir_empty
1339{
1340 my ($path) = @_;
1341 opendir DIR, $path;
1342 while(my $entry = readdir DIR) {
1343 next if($entry =~ /^\.\.?$/);
1344 closedir DIR;
1345 return 0;
1346 }
1347 closedir DIR;
1348 return 1;
1349}
1350
1351# Returns the given filename converted using either URL encoding or base64
1352# encoding, as specified by $rename_method. If the given filename has no suffix
1353# (if it is just the tailname), then $no_suffix should be some defined value.
1354# rename_method can be url, none, base64
1355sub rename_file {
1356 my ($filename, $rename_method, $no_suffix) = @_;
1357
1358 if(!$filename) { # undefined or empty string
1359 return $filename;
1360 }
1361
1362 if (!$rename_method) {
1363 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1364 # Debugging information
1365 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1366 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1367 $rename_method = "url";
1368 } elsif($rename_method eq "none") {
1369 return $filename; # would have already been renamed
1370 }
1371
1372 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1373 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1374 ###$filename =~ s/ /_/g;
1375
1376 my ($tailname,$dirname,$suffix);
1377 if($no_suffix) { # given a tailname, no suffix
1378 ($tailname,$dirname) = File::Basename::fileparse($filename);
1379 }
1380 else {
1381 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1382 }
1383 $suffix = "" if !$suffix;
1384
1385 if ($rename_method eq "url") {
1386 $tailname = &unicode::url_encode($tailname);
1387 }
1388 elsif ($rename_method eq "base64") {
1389 $tailname = &unicode::base64_encode($tailname);
1390 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1391 }
1392
1393 $filename = "$tailname$suffix";
1394 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1395
1396 return $filename;
1397}
1398
1399
1400# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1401sub rename_ldb_or_bdb_file {
1402 my ($filename_no_ext) = @_;
1403
1404 my $new_filename = "$filename_no_ext.gdb";
1405 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1406 # try ldb
1407 my $old_filename = "$filename_no_ext.ldb";
1408
1409 if (-f $old_filename) {
1410 print STDERR "Renaming $old_filename to $new_filename\n";
1411 rename ($old_filename, $new_filename)
1412 || print STDERR "Rename failed: $!\n";
1413 return;
1414 }
1415 # try bdb
1416 $old_filename = "$filename_no_ext.bdb";
1417 if (-f $old_filename) {
1418 print STDERR "Renaming $old_filename to $new_filename\n";
1419 rename ($old_filename, $new_filename)
1420 || print STDERR "Rename failed: $!\n";
1421 return;
1422 }
1423}
1424
1425
1426# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1427# By default, /greenstone3 for GS3 or /greenstone for GS2.
1428sub get_greenstone_url_prefix() {
1429 # if already set on a previous occasion, just return that
1430 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1431 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1432
1433 my ($configfile, $urlprefix, $defaultUrlprefix);
1434 my @propertynames = ();
1435
1436 if($ENV{'GSDL3SRCHOME'}) {
1437 $defaultUrlprefix = "/greenstone3";
1438 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1439 push(@propertynames, qw/path\s*\=/);
1440 } else {
1441 $defaultUrlprefix = "/greenstone";
1442 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1443 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1444 }
1445
1446 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1447
1448 if(!$urlprefix) { # no values found for URL prefix, use default values
1449 $urlprefix = $defaultUrlprefix;
1450 } else {
1451 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1452 $urlprefix =~ s/^\///; # remove the starting slash
1453 my @dirs = split(/(\\|\/)/, $urlprefix);
1454 $urlprefix = shift(@dirs);
1455
1456 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1457 $urlprefix = "/$urlprefix";
1458 }
1459 }
1460
1461 # set for the future
1462 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1463# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1464 return $urlprefix;
1465}
1466
1467
1468# Given a config file (xml or java properties file) and a list/array of regular expressions
1469# that represent property names to match on, this function will return the value for the 1st
1470# matching property name. If the return value is undefined, no matching property was found.
1471sub extract_propvalue_from_file() {
1472 my ($configfile, $propertynames) = @_;
1473
1474 my $value;
1475 unless(open(FIN, "<$configfile")) {
1476 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1477 return $value; # not initialised
1478 }
1479
1480 # Read the entire file at once, as one single line, then close it
1481 my $filecontents;
1482 {
1483 local $/ = undef;
1484 $filecontents = <FIN>;
1485 }
1486 close(FIN);
1487
1488 foreach my $regex (@$propertynames) {
1489 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1490 if($value) {
1491 $value =~ s/^\"//; # remove any startquotes
1492 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1493 last; # found value for a matching property, break from loop
1494 }
1495 }
1496
1497 return $value;
1498}
1499
1500# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1501# given that perllib is in @INC in order to invoke this subroutine.
1502# Call as follows -- after setting up INC to include perllib and
1503# after setting up GSDLHOME and GSDLOS:
1504#
1505# require util;
1506# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1507#
1508sub setup_greenstone_env() {
1509 my ($GSDLHOME, $GSDLOS) = @_;
1510
1511 #my %env_map = ();
1512 # Get the localised ENV settings of running a localised source setup.bash
1513 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1514 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1515 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1516 if($GSDLOS =~ m/windows/i) {
1517 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1518 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1519 }
1520 if (!open(PIN, "$perl_command |")) {
1521 print STDERR ("Unable to execute command: $perl_command. $!\n");
1522 }
1523
1524 while (defined (my $perl_output_line = <PIN>)) {
1525 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1526 #$env_map{$key}=$value;
1527 $ENV{$key}=$value;
1528 }
1529
1530 # If any keys in $ENV don't occur in Greenstone's localised env
1531 # (stored in $env_map), delete those entries from $ENV
1532 #foreach $key (keys %ENV) {
1533 # if(!defined $env_map{$key}) {
1534 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1535 # delete $ENV{$key}; # del $ENV(key, value) pair
1536 # }
1537 #}
1538 #undef %env_map;
1539}
1540
15411;
Note: See TracBrowser for help on using the repository browser.