source: main/trunk/greenstone2/perllib/util.pm@ 23416

Last change on this file since 23416 was 23416, checked in by davidb, 13 years ago

More careful handling of filenames going into 'block' hash. On Windows want to make sure we are consistently dealing with either C: or c: filenames

  • Property svn:keywords set to Author Date Id Revision
File size: 42.2 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61# removes files (but not directories) - can rename this to the default
62# "rm" subroutine when debugging the deletion of individual files.
63sub rm_debug {
64 my (@files) = @_;
65 my @filefiles = ();
66
67 # make sure the files we want to delete exist
68 # and are regular files
69 foreach my $file (@files) {
70 if (!-e $file) {
71 print STDERR "util::rm $file does not exist\n";
72 } elsif ((!-f $file) && (!-l $file)) {
73 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
74 } else { # debug message
75 unlink($file) or warn "Could not delete file $file: $!\n";
76 }
77 }
78}
79
80
81# recursive removal
82sub filtered_rm_r {
83 my ($files,$file_accept_re,$file_reject_re) = @_;
84
85 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
86
87 # recursively remove the files
88 foreach my $file (@files_array) {
89 $file =~ s/[\/\\]+$//; # remove trailing slashes
90
91 if (!-e $file) {
92 print STDERR "util::filtered_rm_r $file does not exist\n";
93
94 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
95 # get the contents of this directory
96 if (!opendir (INDIR, $file)) {
97 print STDERR "util::filtered_rm_r could not open directory $file\n";
98 } else {
99 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
100 closedir (INDIR);
101
102 # remove all the files in this directory
103 map {$_="$file/$_";} @filedir;
104 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
105
106 if (!defined $file_accept_re && !defined $file_reject_re) {
107 # remove this directory
108 if (!rmdir $file) {
109 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
110 }
111 }
112 }
113 } else {
114 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
115
116 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
117 # remove this file
118 &rm ($file);
119 }
120 }
121 }
122}
123
124
125# recursive removal
126sub rm_r {
127 my (@files) = @_;
128
129 # use the more general (but reterospectively written function
130 # filtered_rm_r function()
131
132 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
133}
134
135
136
137
138# moves a file or a group of files
139sub mv {
140 my $dest = pop (@_);
141 my (@srcfiles) = @_;
142
143 # remove trailing slashes from source and destination files
144 $dest =~ s/[\\\/]+$//;
145 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
146
147 # a few sanity checks
148 if (scalar (@srcfiles) == 0) {
149 print STDERR "util::mv no destination directory given\n";
150 return;
151 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
152 print STDERR "util::mv if multiple source files are given the ".
153 "destination must be a directory\n";
154 return;
155 }
156
157 # move the files
158 foreach my $file (@srcfiles) {
159 my $tempdest = $dest;
160 if (-d $tempdest) {
161 my ($filename) = $file =~ /([^\\\/]+)$/;
162 $tempdest .= "/$filename";
163 }
164 if (!-e $file) {
165 print STDERR "util::mv $file does not exist\n";
166 } else {
167 rename ($file, $tempdest);
168 }
169 }
170}
171
172
173# copies a file or a group of files
174sub cp {
175 my $dest = pop (@_);
176 my (@srcfiles) = @_;
177
178 # remove trailing slashes from source and destination files
179 $dest =~ s/[\\\/]+$//;
180 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
181
182 # a few sanity checks
183 if (scalar (@srcfiles) == 0) {
184 print STDERR "util::cp no destination directory given\n";
185 return;
186 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
187 print STDERR "util::cp if multiple source files are given the ".
188 "destination must be a directory\n";
189 return;
190 }
191
192 # copy the files
193 foreach my $file (@srcfiles) {
194 my $tempdest = $dest;
195 if (-d $tempdest) {
196 my ($filename) = $file =~ /([^\\\/]+)$/;
197 $tempdest .= "/$filename";
198 }
199 if (!-e $file) {
200 print STDERR "util::cp $file does not exist\n";
201 } elsif (!-f $file) {
202 print STDERR "util::cp $file is not a plain file\n";
203 } else {
204 &File::Copy::copy ($file, $tempdest);
205 }
206 }
207}
208
209
210
211# recursively copies a file or group of files
212# syntax: cp_r (sourcefiles, destination directory)
213# destination must be a directory - to copy one file to
214# another use cp instead
215sub cp_r {
216 my $dest = pop (@_);
217 my (@srcfiles) = @_;
218
219 # a few sanity checks
220 if (scalar (@srcfiles) == 0) {
221 print STDERR "util::cp_r no destination directory given\n";
222 return;
223 } elsif (-f $dest) {
224 print STDERR "util::cp_r destination must be a directory\n";
225 return;
226 }
227
228 # create destination directory if it doesn't exist already
229 if (! -d $dest) {
230 my $store_umask = umask(0002);
231 mkdir ($dest, 0777);
232 umask($store_umask);
233 }
234
235 # copy the files
236 foreach my $file (@srcfiles) {
237
238 if (!-e $file) {
239 print STDERR "util::cp_r $file does not exist\n";
240
241 } elsif (-d $file) {
242 # make the new directory
243 my ($filename) = $file =~ /([^\\\/]*)$/;
244 $dest = &util::filename_cat ($dest, $filename);
245 my $store_umask = umask(0002);
246 mkdir ($dest, 0777);
247 umask($store_umask);
248
249 # get the contents of this directory
250 if (!opendir (INDIR, $file)) {
251 print STDERR "util::cp_r could not open directory $file\n";
252 } else {
253 my @filedir = readdir (INDIR);
254 closedir (INDIR);
255 foreach my $f (@filedir) {
256 next if $f =~ /^\.\.?$/;
257 # copy all the files in this directory
258 my $ff = &util::filename_cat ($file, $f);
259 &cp_r ($ff, $dest);
260 }
261 }
262
263 } else {
264 &cp($file, $dest);
265 }
266 }
267}
268# recursively copies a file or group of files
269# syntax: cp_r (sourcefiles, destination directory)
270# destination must be a directory - to copy one file to
271# another use cp instead
272sub cp_r_nosvn {
273 my $dest = pop (@_);
274 my (@srcfiles) = @_;
275
276 # a few sanity checks
277 if (scalar (@srcfiles) == 0) {
278 print STDERR "util::cp_r no destination directory given\n";
279 return;
280 } elsif (-f $dest) {
281 print STDERR "util::cp_r destination must be a directory\n";
282 return;
283 }
284
285 # create destination directory if it doesn't exist already
286 if (! -d $dest) {
287 my $store_umask = umask(0002);
288 mkdir ($dest, 0777);
289 umask($store_umask);
290 }
291
292 # copy the files
293 foreach my $file (@srcfiles) {
294
295 if (!-e $file) {
296 print STDERR "util::cp_r $file does not exist\n";
297
298 } elsif (-d $file) {
299 # make the new directory
300 my ($filename) = $file =~ /([^\\\/]*)$/;
301 $dest = &util::filename_cat ($dest, $filename);
302 my $store_umask = umask(0002);
303 mkdir ($dest, 0777);
304 umask($store_umask);
305
306 # get the contents of this directory
307 if (!opendir (INDIR, $file)) {
308 print STDERR "util::cp_r could not open directory $file\n";
309 } else {
310 my @filedir = readdir (INDIR);
311 closedir (INDIR);
312 foreach my $f (@filedir) {
313 next if $f =~ /^\.\.?$/;
314 next if $f =~ /^\.svn$/;
315 # copy all the files in this directory
316 my $ff = &util::filename_cat ($file, $f);
317 &cp_r ($ff, $dest);
318 }
319 }
320
321 } else {
322 &cp($file, $dest);
323 }
324 }
325}
326
327# copies a directory and its contents, excluding subdirectories, into a new directory
328sub cp_r_toplevel {
329 my $dest = pop (@_);
330 my (@srcfiles) = @_;
331
332 # a few sanity checks
333 if (scalar (@srcfiles) == 0) {
334 print STDERR "util::cp_r no destination directory given\n";
335 return;
336 } elsif (-f $dest) {
337 print STDERR "util::cp_r destination must be a directory\n";
338 return;
339 }
340
341 # create destination directory if it doesn't exist already
342 if (! -d $dest) {
343 my $store_umask = umask(0002);
344 mkdir ($dest, 0777);
345 umask($store_umask);
346 }
347
348 # copy the files
349 foreach my $file (@srcfiles) {
350
351 if (!-e $file) {
352 print STDERR "util::cp_r $file does not exist\n";
353
354 } elsif (-d $file) {
355 # make the new directory
356 my ($filename) = $file =~ /([^\\\/]*)$/;
357 $dest = &util::filename_cat ($dest, $filename);
358 my $store_umask = umask(0002);
359 mkdir ($dest, 0777);
360 umask($store_umask);
361
362 # get the contents of this directory
363 if (!opendir (INDIR, $file)) {
364 print STDERR "util::cp_r could not open directory $file\n";
365 } else {
366 my @filedir = readdir (INDIR);
367 closedir (INDIR);
368 foreach my $f (@filedir) {
369 next if $f =~ /^\.\.?$/;
370
371 # copy all the files in this directory, but not directories
372 my $ff = &util::filename_cat ($file, $f);
373 if (-f $ff) {
374 &cp($ff, $dest);
375 #&cp_r ($ff, $dest);
376 }
377 }
378 }
379
380 } else {
381 &cp($file, $dest);
382 }
383 }
384}
385
386sub mk_dir {
387 my ($dir) = @_;
388
389 my $store_umask = umask(0002);
390 my $mkdir_ok = mkdir ($dir, 0777);
391 umask($store_umask);
392
393 if (!$mkdir_ok)
394 {
395 print STDERR "util::mk_dir could not create directory $dir\n";
396 return;
397 }
398}
399
400# in case anyone cares - I did some testing (using perls Benchmark module)
401# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
402# slightly faster (surprisingly) - Stefan.
403sub mk_all_dir {
404 my ($dir) = @_;
405
406 # use / for the directory separator, remove duplicate and
407 # trailing slashes
408 $dir=~s/[\\\/]+/\//g;
409 $dir=~s/[\\\/]+$//;
410
411 # make sure the cache directory exists
412 my $dirsofar = "";
413 my $first = 1;
414 foreach my $dirname (split ("/", $dir)) {
415 $dirsofar .= "/" unless $first;
416 $first = 0;
417
418 $dirsofar .= $dirname;
419
420 next if $dirname =~ /^(|[a-z]:)$/i;
421 if (!-e $dirsofar)
422 {
423 my $store_umask = umask(0002);
424 my $mkdir_ok = mkdir ($dirsofar, 0777);
425 umask($store_umask);
426 if (!$mkdir_ok)
427 {
428 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
429 return;
430 }
431 }
432 }
433}
434
435# make hard link to file if supported by OS, otherwise copy the file
436sub hard_link {
437 my ($src, $dest, $verbosity) = @_;
438
439 # remove trailing slashes from source and destination files
440 $src =~ s/[\\\/]+$//;
441 $dest =~ s/[\\\/]+$//;
442
443## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
444 # a few sanity checks
445 if (-e $dest) {
446 # destination file already exists
447 return;
448 }
449 elsif (!-e $src) {
450 print STDERR "util::hard_link source file \"$src\" does not exist\n";
451 return 1;
452 }
453 elsif (-d $src) {
454 print STDERR "util::hard_link source \"$src\" is a directory\n";
455 return 1;
456 }
457
458 my $dest_dir = &File::Basename::dirname($dest);
459 mk_all_dir($dest_dir) if (!-e $dest_dir);
460
461
462 if (!link($src, $dest)) {
463 if ((!defined $verbosity) || ($verbosity>2)) {
464 print STDERR "util::hard_link: unable to create hard link. ";
465 print STDERR " Copying file: $src -> $dest\n";
466 }
467 &File::Copy::copy ($src, $dest);
468 }
469 return 0;
470}
471
472# make soft link to file if supported by OS, otherwise copy file
473sub soft_link {
474 my ($src, $dest, $ensure_paths_absolute) = @_;
475
476 # remove trailing slashes from source and destination files
477 $src =~ s/[\\\/]+$//;
478 $dest =~ s/[\\\/]+$//;
479
480 # Ensure file paths are absolute IF requested to do so
481 # Soft_linking didn't work for relative paths
482 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
483 # We need to ensure that the src file is the absolute path
484 # See http://perldoc.perl.org/File/Spec.html
485 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
486 $src = File::Spec->rel2abs($src); # make absolute
487 }
488 # Might as well ensure that the destination file's absolute path is used
489 if(!File::Spec->file_name_is_absolute( $dest )) {
490 $dest = File::Spec->rel2abs($dest); # make absolute
491 }
492 }
493
494 # a few sanity checks
495 if (!-e $src) {
496 print STDERR "util::soft_link source file $src does not exist\n";
497 return 0;
498 }
499
500 my $dest_dir = &File::Basename::dirname($dest);
501 mk_all_dir($dest_dir) if (!-e $dest_dir);
502
503 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
504 # symlink not supported on windows
505 &File::Copy::copy ($src, $dest);
506
507 } elsif (!eval {symlink($src, $dest)}) {
508 print STDERR "util::soft_link: unable to create soft link.\n";
509 return 0;
510 }
511
512 return 1;
513}
514
515# Primarily for filenames generated by processing
516# content of HTML files (which are mapped to UTF-8 internally)
517#
518# To turn this into an octet string that really exists on the file
519# system:
520# 1. don't need to do anything special for Unix-based systems
521# (as underlying file system is byte-code)
522# 2. need to map to short DOS filenames for Windows
523
524sub utf8_to_real_filename
525{
526 my ($utf8_filename) = @_;
527
528 my $real_filename;
529
530 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
531 require Win32;
532
533 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
534
535 my $unicode_filename = decode("utf8",$utf8_filename);
536 $real_filename = Win32::GetShortPathName($unicode_filename);
537 }
538 else {
539 $real_filename = $utf8_filename;
540 }
541
542 return $real_filename;
543}
544
545
546sub fd_exists
547{
548 my $filename_full_path = shift @_;
549 my $test_op = shift @_ || "-e";
550
551 # By default tests for existance of file or directory (-e)
552 # Can be made more specific by providing second parameter (e.g. -f or -d)
553
554 my $exists = 0;
555
556 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
557 require Win32;
558 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
559 if (!defined $filename_short_path) {
560 # Was probably still in UTF8 form (not what is needed on Windows)
561 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
562 if (defined $unicode_filename_full_path) {
563 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
564 }
565 }
566 $filename_full_path = $filename_short_path;
567 }
568
569 if (defined $filename_full_path) {
570 $exists = eval "($test_op \$filename_full_path)";
571 }
572
573 return $exists;
574}
575
576sub file_exists
577{
578 my ($filename_full_path) = @_;
579
580 return fd_exists($filename_full_path,"-f");
581}
582
583sub dir_exists
584{
585 my ($filename_full_path) = @_;
586
587 return fd_exists($filename_full_path,"-d");
588}
589
590
591
592# updates a copy of a directory in some other part of the filesystem
593# verbosity settings are: 0=low, 1=normal, 2=high
594# both $fromdir and $todir should be absolute paths
595sub cachedir {
596 my ($fromdir, $todir, $verbosity) = @_;
597 $verbosity = 1 unless defined $verbosity;
598
599 # use / for the directory separator, remove duplicate and
600 # trailing slashes
601 $fromdir=~s/[\\\/]+/\//g;
602 $fromdir=~s/[\\\/]+$//;
603 $todir=~s/[\\\/]+/\//g;
604 $todir=~s/[\\\/]+$//;
605
606 &mk_all_dir ($todir);
607
608 # get the directories in ascending order
609 if (!opendir (FROMDIR, $fromdir)) {
610 print STDERR "util::cachedir could not read directory $fromdir\n";
611 return;
612 }
613 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
614 closedir (FROMDIR);
615
616 if (!opendir (TODIR, $todir)) {
617 print STDERR "util::cacedir could not read directory $todir\n";
618 return;
619 }
620 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
621 closedir (TODIR);
622
623 my $fromi = 0;
624 my $toi = 0;
625
626 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
627# print "fromi: $fromi toi: $toi\n";
628
629 # see if we should delete a file/directory
630 # this should happen if the file/directory
631 # is not in the from list or if its a different
632 # size, or has an older timestamp
633 if ($toi < scalar(@todir)) {
634 if (($fromi >= scalar(@fromdir)) ||
635 ($todir[$toi] lt $fromdir[$fromi] ||
636 ($todir[$toi] eq $fromdir[$fromi] &&
637 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
638 $verbosity)))) {
639
640 # the files are different
641 &rm_r("$todir/$todir[$toi]");
642 splice(@todir, $toi, 1); # $toi stays the same
643
644 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
645 # the files are the same
646 # if it is a directory, check its contents
647 if (-d "$todir/$todir[$toi]") {
648 &cachedir ("$fromdir/$fromdir[$fromi]",
649 "$todir/$todir[$toi]", $verbosity);
650 }
651
652 $toi++;
653 $fromi++;
654 next;
655 }
656 }
657
658 # see if we should insert a file/directory
659 # we should insert a file/directory if there
660 # is no tofiles left or if the tofile does not exist
661 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
662 $todir[$toi] gt $fromdir[$fromi])) {
663 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
664 splice (@todir, $toi, 0, $fromdir[$fromi]);
665
666 $toi++;
667 $fromi++;
668 }
669 }
670}
671
672# this function returns -1 if either file is not found
673# assumes that $file1 and $file2 are absolute file names or
674# in the current directory
675# $file2 is allowed to be newer than $file1
676sub differentfiles {
677 my ($file1, $file2, $verbosity) = @_;
678 $verbosity = 1 unless defined $verbosity;
679
680 $file1 =~ s/\/+$//;
681 $file2 =~ s/\/+$//;
682
683 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
684 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
685
686 return -1 unless (-e $file1 && -e $file2);
687 if ($file1name ne $file2name) {
688 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
689 return 1;
690 }
691
692 my @file1stat = stat ($file1);
693 my @file2stat = stat ($file2);
694
695 if (-d $file1) {
696 if (! -d $file2) {
697 print STDERR "one file is a directory\n" if ($verbosity >= 2);
698 return 1;
699 }
700 return 0;
701 }
702
703 # both must be regular files
704 unless (-f $file1 && -f $file2) {
705 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
706 return 1;
707 }
708
709 # the size of the files must be the same
710 if ($file1stat[7] != $file2stat[7]) {
711 print STDERR "different sized files\n" if ($verbosity >= 2);
712 return 1;
713 }
714
715 # the second file cannot be older than the first
716 if ($file1stat[9] > $file2stat[9]) {
717 print STDERR "file is older\n" if ($verbosity >= 2);
718 return 1;
719 }
720
721 return 0;
722}
723
724
725sub get_tmp_filename
726{
727 my $file_ext = shift(@_) || undef;
728
729 my $opt_dot_file_ext = "";
730 if (defined $file_ext) {
731 if ($file_ext !~ m/\./) {
732 # no dot, so needs one added in at start
733 $opt_dot_file_ext = ".$file_ext"
734 }
735 else {
736 # allow for "extensions" such as _metadata.txt to be handled
737 # gracefully
738 $opt_dot_file_ext = $file_ext;
739 }
740 }
741
742 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
743 &mk_all_dir ($tmpdir) unless -e $tmpdir;
744
745 my $count = 1000;
746 my $rand = int(rand $count);
747 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
748
749 while (-e $full_tmp_filename) {
750 $rand = int(rand $count);
751 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
752 $count++;
753 }
754
755 return $full_tmp_filename;
756}
757
758sub get_timestamped_tmp_folder
759{
760
761 my $tmp_dirname;
762 if(defined $ENV{'GSDLCOLLECTDIR'}) {
763 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
764 } elsif(defined $ENV{'GSDLHOME'}) {
765 $tmp_dirname = $ENV{'GSDLHOME'};
766 } else {
767 return undef;
768 }
769
770 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
771 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
772
773 # add the timestamp into the path otherwise we can run into problems
774 # if documents have the same name
775 my $timestamp = time;
776 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
777 $tmp_dirname = $time_tmp_dirname;
778 my $i = 1;
779 while (-e $tmp_dirname) {
780 $tmp_dirname = "$time_tmp_dirname$i";
781 $i++;
782 }
783 &util::mk_dir($tmp_dirname);
784
785 return $tmp_dirname;
786}
787
788sub get_timestamped_tmp_filename_in_collection
789{
790
791 my ($input_filename, $output_ext) = @_;
792 # derive tmp filename from input filename
793 my ($tailname, $dirname, $suffix)
794 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
795
796 # softlink to collection tmp dir
797 my $tmp_dirname = &util::get_timestamped_tmp_folder();
798 $tmp_dirname = $dirname unless defined $tmp_dirname;
799
800 # following two steps copied from ConvertBinaryFile
801 # do we need them?? can't use them as is, as they use plugin methods.
802
803 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
804
805 # URLEncode this since htmls with images where the html filename is utf8 don't seem
806 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
807 # files on the filesystem.
808 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
809 if (defined $output_ext) {
810 $output_ext = ".$output_ext"; # add the dot
811 } else {
812 $output_ext = $suffix;
813 }
814 $output_ext= lc($output_ext);
815 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
816
817 return $tmp_filename;
818}
819
820sub get_toplevel_tmp_dir
821{
822 return filename_cat($ENV{'GSDLHOME'}, "tmp");
823}
824
825
826sub filename_to_regex {
827 my $filename = shift (@_);
828
829 # need to put single backslash back to double so that regex works
830 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
831 $filename =~ s/\\/\\\\/g;
832 }
833 return $filename;
834}
835
836sub filename_cat {
837 my $first_file = shift(@_);
838 my (@filenames) = @_;
839
840# Useful for debugging
841# -- might make sense to call caller(0) rather than (1)??
842# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
843# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
844
845 # If first_file is not null or empty, then add it back into the list
846 if (defined $first_file && $first_file =~ /\S/) {
847 unshift(@filenames, $first_file);
848 }
849
850 my $filename = join("/", @filenames);
851
852 # remove duplicate slashes and remove the last slash
853 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
854 $filename =~ s/[\\\/]+/\\/g;
855 } else {
856 $filename =~ s/[\/]+/\//g;
857 # DB: want a filename abc\de.html to remain like this
858 }
859 $filename =~ s/[\\\/]$//;
860
861 return $filename;
862}
863
864
865sub pathname_cat {
866 my $first_path = shift(@_);
867 my (@pathnames) = @_;
868
869 # If first_path is not null or empty, then add it back into the list
870 if (defined $first_path && $first_path =~ /\S/) {
871 unshift(@pathnames, $first_path);
872 }
873
874 my $join_char;
875 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
876 $join_char = ";";
877 } else {
878 $join_char = ":";
879 }
880
881 my $pathname = join($join_char, @pathnames);
882
883 # remove duplicate slashes
884 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
885 $pathname =~ s/[\\\/]+/\\/g;
886 } else {
887 $pathname =~ s/[\/]+/\//g;
888 # DB: want a pathname abc\de.html to remain like this
889 }
890
891 return $pathname;
892}
893
894
895sub tidy_up_oid {
896 my ($OID) = @_;
897 if ($OID =~ /\./) {
898 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
899 $OID =~ s/\.//g; #remove any periods
900 }
901 if ($OID =~ /^\s.*\s$/) {
902 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
903 # remove starting and trailing whitespace
904 $OID =~ s/^\s+//;
905 $OID =~ s/\s+$//;
906 }
907 if ($OID =~ /^[\d]*$/) {
908 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
909 $OID = "D" . $OID;
910 }
911
912 return $OID;
913}
914sub envvar_prepend {
915 my ($var,$val) = @_;
916
917 # do not prepend any value/path that's already in the environment variable
918 if ($ENV{'GSDLOS'} =~ /^windows$/i)
919 {
920 my $escaped_val = $val;
921 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
922 if (!defined($ENV{$var})) {
923 $ENV{$var} = "$val";
924 }
925 elsif($ENV{$var} !~ m/$escaped_val/) {
926 $ENV{$var} = "$val;".$ENV{$var};
927 }
928 }
929 else {
930 if (!defined($ENV{$var})) {
931 $ENV{$var} = "$val";
932 }
933 elsif($ENV{$var} !~ m/$val/) {
934 $ENV{$var} = "$val:".$ENV{$var};
935 }
936 }
937}
938
939sub envvar_append {
940 my ($var,$val) = @_;
941
942 # do not append any value/path that's already in the environment variable
943 if ($ENV{'GSDLOS'} =~ /^windows$/i)
944 {
945 my $escaped_val = $val;
946 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
947 if (!defined($ENV{$var})) {
948 $ENV{$var} = "$val";
949 }
950 elsif($ENV{$var} !~ m/$escaped_val/) {
951 $ENV{$var} .= ";$val";
952 }
953 }
954 else {
955 if (!defined($ENV{$var})) {
956 $ENV{$var} = "$val";
957 }
958 elsif($ENV{$var} !~ m/$val/) {
959 $ENV{$var} .= ":$val";
960 }
961 }
962}
963
964
965# splits a filename into a prefix and a tail extension using the tail_re, or
966# if that fails, splits on the file_extension . (dot)
967sub get_prefix_and_tail_by_regex {
968
969 my ($filename,$tail_re) = @_;
970
971 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
972 if ((!defined $file_prefix) || (!defined $file_ext)) {
973 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
974 }
975
976 return ($file_prefix,$file_ext);
977}
978
979# get full path and file only path from a base_dir (which may be empty) and
980# file (which may contain directories)
981sub get_full_filenames {
982 my ($base_dir, $file) = @_;
983
984 my $filename_full_path = $file;
985 # add on directory if present
986 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
987
988 my $filename_no_path = $file;
989
990 # remove directory if present
991 $filename_no_path =~ s/^.*[\/\\]//;
992 return ($filename_full_path, $filename_no_path);
993}
994
995# returns the path of a file without the filename -- ie. the directory the file is in
996sub filename_head {
997 my $filename = shift(@_);
998
999 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1000 $filename =~ s/[^\\\\]*$//;
1001 }
1002 else {
1003 $filename =~ s/[^\\\/]*$//;
1004 }
1005
1006 return $filename;
1007}
1008
1009
1010
1011# returns 1 if filename1 and filename2 point to the same
1012# file or directory
1013sub filenames_equal {
1014 my ($filename1, $filename2) = @_;
1015
1016 # use filename_cat to clean up trailing slashes and
1017 # multiple slashes
1018 $filename1 = filename_cat ($filename1);
1019 $filename2 = filename_cat ($filename2);
1020
1021 # filenames not case sensitive on windows
1022 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1023 $filename1 =~ tr/[A-Z]/[a-z]/;
1024 $filename2 =~ tr/[A-Z]/[a-z]/;
1025 }
1026 return 1 if $filename1 eq $filename2;
1027 return 0;
1028}
1029
1030
1031sub filename_within_directory
1032{
1033 my ($filename,$within_dir) = @_;
1034
1035 if ($within_dir !~ m/[\/\\]$/) {
1036 my $dirsep = &util::get_dirsep();
1037 $within_dir .= $dirsep;
1038 }
1039
1040 $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
1041
1042 if ($filename =~ m/^$within_dir(.*)$/) {
1043 $filename = $1;
1044 }
1045
1046 return $filename;
1047}
1048
1049sub filename_within_collection
1050{
1051 my ($filename) = @_;
1052
1053 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1054
1055 if (defined $collect_dir) {
1056
1057 # if from within GSDLCOLLECTDIR, then remove directory prefix
1058 # so source_filename is realative to it. This is done to aid
1059 # portability, i.e. the collection can be moved to somewhere
1060 # else on the file system and the archives directory will still
1061 # work. This is needed, for example in the applet version of
1062 # GLI where GSDLHOME/collect on the server will be different to
1063 # the collect directory of the remove user. Of course,
1064 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1065 # it back into a full pathname.
1066
1067 $filename = filename_within_directory($filename,$collect_dir);
1068 }
1069
1070 return $filename;
1071}
1072
1073sub prettyprint_file
1074{
1075 my ($base_dir,$file) = @_;
1076
1077 my $filename_full_path = &util::filename_cat($base_dir,$file);
1078
1079 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1080 require Win32;
1081
1082 # For some reason base_dir in the form c:/a/b/c
1083 # This leads to confusion later on, so turn it back into
1084 # the more usual Windows form
1085 $base_dir =~ s/\//\\/g;
1086 my $long_base_dir = Win32::GetLongPathName($base_dir);
1087 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1088
1089 $file = filename_within_directory($long_full_path,$long_base_dir);
1090 }
1091
1092 return $file;
1093}
1094
1095
1096sub upgrade_if_dos_filename
1097{
1098 my ($filename_full_path,$and_encode) = @_;
1099
1100 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1101 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1102 # to its long (Windows) version
1103 my $long_filename = Win32::GetLongPathName($filename_full_path);
1104 if (defined $long_filename) {
1105 $filename_full_path = $long_filename;
1106 }
1107 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1108 $filename_full_path =~ s/^(.)/\l$1/;
1109 if ((defined $and_encode) && ($and_encode)) {
1110 $filename_full_path = encode("utf8",$filename_full_path);
1111 }
1112 }
1113
1114 return $filename_full_path;
1115}
1116
1117
1118sub downgrade_if_dos_filename
1119{
1120 my ($filename_full_path) = @_;
1121
1122 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1123 require Win32;
1124
1125 # Ensure the given long Windows filename is in a form that can
1126 # be opened by Perl => convert it to a short DOS-like filename
1127
1128 my $short_filename = Win32::GetShortPathName($filename_full_path);
1129 if (defined $short_filename) {
1130 $filename_full_path = $short_filename;
1131 }
1132 # Make sure initial drive letter is lower-case (to fit in
1133 # with rest of Greenstone)
1134 $filename_full_path =~ s/^(.)/\l$1/;
1135 }
1136
1137 return $filename_full_path;
1138}
1139
1140
1141sub filename_is_absolute
1142{
1143 my ($filename) = @_;
1144
1145 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1146 return ($filename =~ m/^(\w:)?\\/);
1147 }
1148 else {
1149 return ($filename =~ m/^\//);
1150 }
1151}
1152
1153
1154## @method make_absolute()
1155#
1156# Ensure the given file path is absolute in respect to the given base path.
1157#
1158# @param $base_dir A string denoting the base path the given dir must be
1159# absolute to.
1160# @param $dir The directory to be made absolute as a string. Note that the
1161# dir may already be absolute, in which case it will remain
1162# unchanged.
1163# @return The now absolute form of the directory as a string.
1164#
1165# @author John Thompson, DL Consulting Ltd.
1166# @copy 2006 DL Consulting Ltd.
1167#
1168#used in buildcol.pl, doesn't work for all cases --kjdon
1169sub make_absolute {
1170
1171 my ($base_dir, $dir) = @_;
1172### print STDERR "dir = $dir\n";
1173 $dir =~ s/[\\\/]+/\//g;
1174 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1175 $dir =~ s|^/tmp_mnt||;
1176 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1177 $dir =~ s|/[.][.]?/|/|g;
1178 $dir =~ tr|/|/|s;
1179### print STDERR "dir = $dir\n";
1180
1181 return $dir;
1182}
1183## make_absolute() ##
1184
1185sub get_dirsep {
1186
1187 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1188 return "\\";
1189 } else {
1190 return "\/";
1191 }
1192}
1193
1194sub get_os_dirsep {
1195
1196 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1197 return "\\\\";
1198 } else {
1199 return "\\\/";
1200 }
1201}
1202
1203sub get_re_dirsep {
1204
1205 return "\\\\|\\\/";
1206}
1207
1208
1209sub get_dirsep_tail {
1210 my ($filename) = @_;
1211
1212 # returns last part of directory or filename
1213 # On unix e.g. a/b.d => b.d
1214 # a/b/c => c
1215
1216 my $dirsep = get_re_dirsep();
1217 my @dirs = split (/$dirsep/, $filename);
1218 my $tail = pop @dirs;
1219
1220 # - caused problems under windows
1221 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1222
1223 return $tail;
1224}
1225
1226
1227# if this is running on windows we want binaries to end in
1228# .exe, otherwise they don't have to end in any extension
1229sub get_os_exe {
1230 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1231 return "";
1232}
1233
1234
1235# test to see whether this is a big or little endian machine
1236sub is_little_endian
1237{
1238 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1239 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1240 # Otherwise, it's little endian
1241
1242 #return 0 if $^O =~ /^darwin$/i;
1243 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1244
1245 # Going back to stating exactly whether the machine is little endian
1246 # or big endian, without any special case for Macs. Since for rata it comes
1247 # back with little endian and for shuttle with bigendian.
1248 return (ord(substr(pack("s",1), 0, 1)) == 1);
1249}
1250
1251
1252# will return the collection name if successful, "" otherwise
1253sub use_collection {
1254 my ($collection, $collectdir) = @_;
1255
1256 if (!defined $collectdir || $collectdir eq "") {
1257 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1258 }
1259
1260 # get and check the collection
1261 if (!defined($collection) || $collection eq "") {
1262 if (defined $ENV{'GSDLCOLLECTION'}) {
1263 $collection = $ENV{'GSDLCOLLECTION'};
1264 } else {
1265 print STDOUT "No collection specified\n";
1266 return "";
1267 }
1268 }
1269
1270 if ($collection eq "modelcol") {
1271 print STDOUT "You can't use modelcol.\n";
1272 return "";
1273 }
1274
1275 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1276 # are defined
1277 $ENV{'GSDLCOLLECTION'} = $collection;
1278 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1279
1280 # make sure this collection exists
1281 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1282 print STDOUT "Invalid collection ($collection).\n";
1283 return "";
1284 }
1285
1286 # everything is ready to go
1287 return $collection;
1288}
1289
1290sub get_current_collection_name {
1291 return $ENV{'GSDLCOLLECTION'};
1292}
1293
1294
1295# will return the collection name if successful, "" otherwise.
1296# Like use_collection (above) but for greenstone 3 (taking account of site level)
1297
1298sub use_site_collection {
1299 my ($site, $collection, $collectdir) = @_;
1300
1301 if (!defined $collectdir || $collectdir eq "") {
1302 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1303 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1304 }
1305
1306 # collectdir explicitly set by this point (using $site variable if required).
1307 # Can call "old" gsdl2 use_collection now.
1308
1309 return use_collection($collection,$collectdir);
1310}
1311
1312
1313
1314sub locate_config_file
1315{
1316 my ($file) = @_;
1317
1318 my $locations = locate_config_files($file);
1319
1320 return shift @$locations; # returns undef if 'locations' is empty
1321}
1322
1323
1324sub locate_config_files
1325{
1326 my ($file) = @_;
1327
1328 my @locations = ();
1329
1330 if (-e $file) {
1331 # Clearly specified (most likely full filename)
1332 # No need to hunt in 'etc' directories, return value unchanged
1333 push(@locations,$file);
1334 }
1335 else {
1336 # Check for collection specific one before looking in global GSDL 'etc'
1337 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1338 my $test_collect_etc_filename
1339 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1340
1341 if (-e $test_collect_etc_filename) {
1342 push(@locations,$test_collect_etc_filename);
1343 }
1344 }
1345 my $test_main_etc_filename
1346 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1347 if (-e $test_main_etc_filename) {
1348 push(@locations,$test_main_etc_filename);
1349 }
1350 }
1351
1352 return \@locations;
1353}
1354
1355
1356sub hyperlink_text
1357{
1358 my ($text) = @_;
1359
1360 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1361 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1362
1363 return $text;
1364}
1365
1366
1367# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1368# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1369sub is_dir_empty
1370{
1371 my ($path) = @_;
1372 opendir DIR, $path;
1373 while(my $entry = readdir DIR) {
1374 next if($entry =~ /^\.\.?$/);
1375 closedir DIR;
1376 return 0;
1377 }
1378 closedir DIR;
1379 return 1;
1380}
1381
1382# Returns the given filename converted using either URL encoding or base64
1383# encoding, as specified by $rename_method. If the given filename has no suffix
1384# (if it is just the tailname), then $no_suffix should be some defined value.
1385# rename_method can be url, none, base64
1386sub rename_file {
1387 my ($filename, $rename_method, $no_suffix) = @_;
1388
1389 if(!$filename) { # undefined or empty string
1390 return $filename;
1391 }
1392
1393 if (!$rename_method) {
1394 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1395 # Debugging information
1396 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1397 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1398 $rename_method = "url";
1399 } elsif($rename_method eq "none") {
1400 return $filename; # would have already been renamed
1401 }
1402
1403 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1404 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1405 ###$filename =~ s/ /_/g;
1406
1407 my ($tailname,$dirname,$suffix);
1408 if($no_suffix) { # given a tailname, no suffix
1409 ($tailname,$dirname) = File::Basename::fileparse($filename);
1410 }
1411 else {
1412 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1413 }
1414 if (!$suffix) {
1415 $suffix = "";
1416 }
1417 else {
1418 $suffix = lc($suffix);
1419 }
1420
1421 if ($rename_method eq "url") {
1422 $tailname = &unicode::url_encode($tailname);
1423 }
1424 elsif ($rename_method eq "base64") {
1425 $tailname = &unicode::base64_encode($tailname);
1426 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1427 }
1428
1429 $filename = "$tailname$suffix";
1430 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1431
1432 return $filename;
1433}
1434
1435
1436# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1437sub rename_ldb_or_bdb_file {
1438 my ($filename_no_ext) = @_;
1439
1440 my $new_filename = "$filename_no_ext.gdb";
1441 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1442 # try ldb
1443 my $old_filename = "$filename_no_ext.ldb";
1444
1445 if (-f $old_filename) {
1446 print STDERR "Renaming $old_filename to $new_filename\n";
1447 rename ($old_filename, $new_filename)
1448 || print STDERR "Rename failed: $!\n";
1449 return;
1450 }
1451 # try bdb
1452 $old_filename = "$filename_no_ext.bdb";
1453 if (-f $old_filename) {
1454 print STDERR "Renaming $old_filename to $new_filename\n";
1455 rename ($old_filename, $new_filename)
1456 || print STDERR "Rename failed: $!\n";
1457 return;
1458 }
1459}
1460
1461
1462# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1463# By default, /greenstone3 for GS3 or /greenstone for GS2.
1464sub get_greenstone_url_prefix() {
1465 # if already set on a previous occasion, just return that
1466 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1467 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1468
1469 my ($configfile, $urlprefix, $defaultUrlprefix);
1470 my @propertynames = ();
1471
1472 if($ENV{'GSDL3SRCHOME'}) {
1473 $defaultUrlprefix = "/greenstone3";
1474 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1475 push(@propertynames, qw/path\s*\=/);
1476 } else {
1477 $defaultUrlprefix = "/greenstone";
1478 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1479 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1480 }
1481
1482 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1483
1484 if(!$urlprefix) { # no values found for URL prefix, use default values
1485 $urlprefix = $defaultUrlprefix;
1486 } else {
1487 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1488 $urlprefix =~ s/^\///; # remove the starting slash
1489 my @dirs = split(/(\\|\/)/, $urlprefix);
1490 $urlprefix = shift(@dirs);
1491
1492 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1493 $urlprefix = "/$urlprefix";
1494 }
1495 }
1496
1497 # set for the future
1498 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1499# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1500 return $urlprefix;
1501}
1502
1503
1504# Given a config file (xml or java properties file) and a list/array of regular expressions
1505# that represent property names to match on, this function will return the value for the 1st
1506# matching property name. If the return value is undefined, no matching property was found.
1507sub extract_propvalue_from_file() {
1508 my ($configfile, $propertynames) = @_;
1509
1510 my $value;
1511 unless(open(FIN, "<$configfile")) {
1512 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1513 return $value; # not initialised
1514 }
1515
1516 # Read the entire file at once, as one single line, then close it
1517 my $filecontents;
1518 {
1519 local $/ = undef;
1520 $filecontents = <FIN>;
1521 }
1522 close(FIN);
1523
1524 foreach my $regex (@$propertynames) {
1525 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1526 if($value) {
1527 $value =~ s/^\"//; # remove any startquotes
1528 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1529 last; # found value for a matching property, break from loop
1530 }
1531 }
1532
1533 return $value;
1534}
1535
1536# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1537# given that perllib is in @INC in order to invoke this subroutine.
1538# Call as follows -- after setting up INC to include perllib and
1539# after setting up GSDLHOME and GSDLOS:
1540#
1541# require util;
1542# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1543#
1544sub setup_greenstone_env() {
1545 my ($GSDLHOME, $GSDLOS) = @_;
1546
1547 #my %env_map = ();
1548 # Get the localised ENV settings of running a localised source setup.bash
1549 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1550 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1551 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1552 if($GSDLOS =~ m/windows/i) {
1553 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1554 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1555 }
1556 if (!open(PIN, "$perl_command |")) {
1557 print STDERR ("Unable to execute command: $perl_command. $!\n");
1558 }
1559
1560 while (defined (my $perl_output_line = <PIN>)) {
1561 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1562 #$env_map{$key}=$value;
1563 $ENV{$key}=$value;
1564 }
1565
1566 # If any keys in $ENV don't occur in Greenstone's localised env
1567 # (stored in $env_map), delete those entries from $ENV
1568 #foreach $key (keys %ENV) {
1569 # if(!defined $env_map{$key}) {
1570 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1571 # delete $ENV{$key}; # del $ENV(key, value) pair
1572 # }
1573 #}
1574 #undef %env_map;
1575}
1576
15771;
Note: See TracBrowser for help on using the repository browser.