source: main/trunk/greenstone2/perllib/util.pm@ 23388

Last change on this file since 23388 was 23388, checked in by davidb, 13 years ago

Further changes to deal with documents that use different filename encodings on the file-system. Now sets UTF8URL metadata to perform the cross-document look up. Files stored in doc.pm as associated files are now always raw filenames (rather than potentially UTF8 encoded). Storing of filenames seen by HTMLPlug when scanning for files to block on is now done in Unicode aware strings rather than utf8 but unware strings.

  • Property svn:keywords set to Author Date Id Revision
File size: 42.2 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61# removes files (but not directories) - can rename this to the default
62# "rm" subroutine when debugging the deletion of individual files.
63sub rm_debug {
64 my (@files) = @_;
65 my @filefiles = ();
66
67 # make sure the files we want to delete exist
68 # and are regular files
69 foreach my $file (@files) {
70 if (!-e $file) {
71 print STDERR "util::rm $file does not exist\n";
72 } elsif ((!-f $file) && (!-l $file)) {
73 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
74 } else { # debug message
75 unlink($file) or warn "Could not delete file $file: $!\n";
76 }
77 }
78}
79
80
81# recursive removal
82sub filtered_rm_r {
83 my ($files,$file_accept_re,$file_reject_re) = @_;
84
85 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
86
87 # recursively remove the files
88 foreach my $file (@files_array) {
89 $file =~ s/[\/\\]+$//; # remove trailing slashes
90
91 if (!-e $file) {
92 print STDERR "util::filtered_rm_r $file does not exist\n";
93
94 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
95 # get the contents of this directory
96 if (!opendir (INDIR, $file)) {
97 print STDERR "util::filtered_rm_r could not open directory $file\n";
98 } else {
99 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
100 closedir (INDIR);
101
102 # remove all the files in this directory
103 map {$_="$file/$_";} @filedir;
104 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
105
106 if (!defined $file_accept_re && !defined $file_reject_re) {
107 # remove this directory
108 if (!rmdir $file) {
109 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
110 }
111 }
112 }
113 } else {
114 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
115
116 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
117 # remove this file
118 &rm ($file);
119 }
120 }
121 }
122}
123
124
125# recursive removal
126sub rm_r {
127 my (@files) = @_;
128
129 # use the more general (but reterospectively written function
130 # filtered_rm_r function()
131
132 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
133}
134
135
136
137
138# moves a file or a group of files
139sub mv {
140 my $dest = pop (@_);
141 my (@srcfiles) = @_;
142
143 # remove trailing slashes from source and destination files
144 $dest =~ s/[\\\/]+$//;
145 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
146
147 # a few sanity checks
148 if (scalar (@srcfiles) == 0) {
149 print STDERR "util::mv no destination directory given\n";
150 return;
151 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
152 print STDERR "util::mv if multiple source files are given the ".
153 "destination must be a directory\n";
154 return;
155 }
156
157 # move the files
158 foreach my $file (@srcfiles) {
159 my $tempdest = $dest;
160 if (-d $tempdest) {
161 my ($filename) = $file =~ /([^\\\/]+)$/;
162 $tempdest .= "/$filename";
163 }
164 if (!-e $file) {
165 print STDERR "util::mv $file does not exist\n";
166 } else {
167 rename ($file, $tempdest);
168 }
169 }
170}
171
172
173# copies a file or a group of files
174sub cp {
175 my $dest = pop (@_);
176 my (@srcfiles) = @_;
177
178 # remove trailing slashes from source and destination files
179 $dest =~ s/[\\\/]+$//;
180 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
181
182 # a few sanity checks
183 if (scalar (@srcfiles) == 0) {
184 print STDERR "util::cp no destination directory given\n";
185 return;
186 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
187 print STDERR "util::cp if multiple source files are given the ".
188 "destination must be a directory\n";
189 return;
190 }
191
192 # copy the files
193 foreach my $file (@srcfiles) {
194 my $tempdest = $dest;
195 if (-d $tempdest) {
196 my ($filename) = $file =~ /([^\\\/]+)$/;
197 $tempdest .= "/$filename";
198 }
199 if (!-e $file) {
200 print STDERR "util::cp $file does not exist\n";
201 } elsif (!-f $file) {
202 print STDERR "util::cp $file is not a plain file\n";
203 } else {
204 &File::Copy::copy ($file, $tempdest);
205 }
206 }
207}
208
209
210
211# recursively copies a file or group of files
212# syntax: cp_r (sourcefiles, destination directory)
213# destination must be a directory - to copy one file to
214# another use cp instead
215sub cp_r {
216 my $dest = pop (@_);
217 my (@srcfiles) = @_;
218
219 # a few sanity checks
220 if (scalar (@srcfiles) == 0) {
221 print STDERR "util::cp_r no destination directory given\n";
222 return;
223 } elsif (-f $dest) {
224 print STDERR "util::cp_r destination must be a directory\n";
225 return;
226 }
227
228 # create destination directory if it doesn't exist already
229 if (! -d $dest) {
230 my $store_umask = umask(0002);
231 mkdir ($dest, 0777);
232 umask($store_umask);
233 }
234
235 # copy the files
236 foreach my $file (@srcfiles) {
237
238 if (!-e $file) {
239 print STDERR "util::cp_r $file does not exist\n";
240
241 } elsif (-d $file) {
242 # make the new directory
243 my ($filename) = $file =~ /([^\\\/]*)$/;
244 $dest = &util::filename_cat ($dest, $filename);
245 my $store_umask = umask(0002);
246 mkdir ($dest, 0777);
247 umask($store_umask);
248
249 # get the contents of this directory
250 if (!opendir (INDIR, $file)) {
251 print STDERR "util::cp_r could not open directory $file\n";
252 } else {
253 my @filedir = readdir (INDIR);
254 closedir (INDIR);
255 foreach my $f (@filedir) {
256 next if $f =~ /^\.\.?$/;
257 # copy all the files in this directory
258 my $ff = &util::filename_cat ($file, $f);
259 &cp_r ($ff, $dest);
260 }
261 }
262
263 } else {
264 &cp($file, $dest);
265 }
266 }
267}
268# recursively copies a file or group of files
269# syntax: cp_r (sourcefiles, destination directory)
270# destination must be a directory - to copy one file to
271# another use cp instead
272sub cp_r_nosvn {
273 my $dest = pop (@_);
274 my (@srcfiles) = @_;
275
276 # a few sanity checks
277 if (scalar (@srcfiles) == 0) {
278 print STDERR "util::cp_r no destination directory given\n";
279 return;
280 } elsif (-f $dest) {
281 print STDERR "util::cp_r destination must be a directory\n";
282 return;
283 }
284
285 # create destination directory if it doesn't exist already
286 if (! -d $dest) {
287 my $store_umask = umask(0002);
288 mkdir ($dest, 0777);
289 umask($store_umask);
290 }
291
292 # copy the files
293 foreach my $file (@srcfiles) {
294
295 if (!-e $file) {
296 print STDERR "util::cp_r $file does not exist\n";
297
298 } elsif (-d $file) {
299 # make the new directory
300 my ($filename) = $file =~ /([^\\\/]*)$/;
301 $dest = &util::filename_cat ($dest, $filename);
302 my $store_umask = umask(0002);
303 mkdir ($dest, 0777);
304 umask($store_umask);
305
306 # get the contents of this directory
307 if (!opendir (INDIR, $file)) {
308 print STDERR "util::cp_r could not open directory $file\n";
309 } else {
310 my @filedir = readdir (INDIR);
311 closedir (INDIR);
312 foreach my $f (@filedir) {
313 next if $f =~ /^\.\.?$/;
314 next if $f =~ /^\.svn$/;
315 # copy all the files in this directory
316 my $ff = &util::filename_cat ($file, $f);
317 &cp_r ($ff, $dest);
318 }
319 }
320
321 } else {
322 &cp($file, $dest);
323 }
324 }
325}
326
327# copies a directory and its contents, excluding subdirectories, into a new directory
328sub cp_r_toplevel {
329 my $dest = pop (@_);
330 my (@srcfiles) = @_;
331
332 # a few sanity checks
333 if (scalar (@srcfiles) == 0) {
334 print STDERR "util::cp_r no destination directory given\n";
335 return;
336 } elsif (-f $dest) {
337 print STDERR "util::cp_r destination must be a directory\n";
338 return;
339 }
340
341 # create destination directory if it doesn't exist already
342 if (! -d $dest) {
343 my $store_umask = umask(0002);
344 mkdir ($dest, 0777);
345 umask($store_umask);
346 }
347
348 # copy the files
349 foreach my $file (@srcfiles) {
350
351 if (!-e $file) {
352 print STDERR "util::cp_r $file does not exist\n";
353
354 } elsif (-d $file) {
355 # make the new directory
356 my ($filename) = $file =~ /([^\\\/]*)$/;
357 $dest = &util::filename_cat ($dest, $filename);
358 my $store_umask = umask(0002);
359 mkdir ($dest, 0777);
360 umask($store_umask);
361
362 # get the contents of this directory
363 if (!opendir (INDIR, $file)) {
364 print STDERR "util::cp_r could not open directory $file\n";
365 } else {
366 my @filedir = readdir (INDIR);
367 closedir (INDIR);
368 foreach my $f (@filedir) {
369 next if $f =~ /^\.\.?$/;
370
371 # copy all the files in this directory, but not directories
372 my $ff = &util::filename_cat ($file, $f);
373 if (-f $ff) {
374 &cp($ff, $dest);
375 #&cp_r ($ff, $dest);
376 }
377 }
378 }
379
380 } else {
381 &cp($file, $dest);
382 }
383 }
384}
385
386sub mk_dir {
387 my ($dir) = @_;
388
389 my $store_umask = umask(0002);
390 my $mkdir_ok = mkdir ($dir, 0777);
391 umask($store_umask);
392
393 if (!$mkdir_ok)
394 {
395 print STDERR "util::mk_dir could not create directory $dir\n";
396 return;
397 }
398}
399
400# in case anyone cares - I did some testing (using perls Benchmark module)
401# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
402# slightly faster (surprisingly) - Stefan.
403sub mk_all_dir {
404 my ($dir) = @_;
405
406 # use / for the directory separator, remove duplicate and
407 # trailing slashes
408 $dir=~s/[\\\/]+/\//g;
409 $dir=~s/[\\\/]+$//;
410
411 # make sure the cache directory exists
412 my $dirsofar = "";
413 my $first = 1;
414 foreach my $dirname (split ("/", $dir)) {
415 $dirsofar .= "/" unless $first;
416 $first = 0;
417
418 $dirsofar .= $dirname;
419
420 next if $dirname =~ /^(|[a-z]:)$/i;
421 if (!-e $dirsofar)
422 {
423 my $store_umask = umask(0002);
424 my $mkdir_ok = mkdir ($dirsofar, 0777);
425 umask($store_umask);
426 if (!$mkdir_ok)
427 {
428 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
429 return;
430 }
431 }
432 }
433}
434
435# make hard link to file if supported by OS, otherwise copy the file
436sub hard_link {
437 my ($src, $dest, $verbosity) = @_;
438
439 # remove trailing slashes from source and destination files
440 $src =~ s/[\\\/]+$//;
441 $dest =~ s/[\\\/]+$//;
442
443## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
444 # a few sanity checks
445 if (-e $dest) {
446 # destination file already exists
447 return;
448 }
449 elsif (!-e $src) {
450 print STDERR "util::hard_link source file \"$src\" does not exist\n";
451 return 1;
452 }
453 elsif (-d $src) {
454 print STDERR "util::hard_link source \"$src\" is a directory\n";
455 return 1;
456 }
457
458 my $dest_dir = &File::Basename::dirname($dest);
459 mk_all_dir($dest_dir) if (!-e $dest_dir);
460
461
462 if (!link($src, $dest)) {
463 if ((!defined $verbosity) || ($verbosity>2)) {
464 print STDERR "util::hard_link: unable to create hard link. ";
465 print STDERR " Copying file: $src -> $dest\n";
466 }
467 &File::Copy::copy ($src, $dest);
468 }
469 return 0;
470}
471
472# make soft link to file if supported by OS, otherwise copy file
473sub soft_link {
474 my ($src, $dest, $ensure_paths_absolute) = @_;
475
476 # remove trailing slashes from source and destination files
477 $src =~ s/[\\\/]+$//;
478 $dest =~ s/[\\\/]+$//;
479
480 # Ensure file paths are absolute IF requested to do so
481 # Soft_linking didn't work for relative paths
482 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
483 # We need to ensure that the src file is the absolute path
484 # See http://perldoc.perl.org/File/Spec.html
485 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
486 $src = File::Spec->rel2abs($src); # make absolute
487 }
488 # Might as well ensure that the destination file's absolute path is used
489 if(!File::Spec->file_name_is_absolute( $dest )) {
490 $dest = File::Spec->rel2abs($dest); # make absolute
491 }
492 }
493
494 # a few sanity checks
495 if (!-e $src) {
496 print STDERR "util::soft_link source file $src does not exist\n";
497 return 0;
498 }
499
500 my $dest_dir = &File::Basename::dirname($dest);
501 mk_all_dir($dest_dir) if (!-e $dest_dir);
502
503 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
504 # symlink not supported on windows
505 &File::Copy::copy ($src, $dest);
506
507 } elsif (!eval {symlink($src, $dest)}) {
508 print STDERR "util::soft_link: unable to create soft link.\n";
509 return 0;
510 }
511
512 return 1;
513}
514
515# Primarily for filenames generated by processing
516# content of HTML files (which are mapped to UTF-8 internally)
517#
518# To turn this into an octet string that really exists on the file
519# system:
520# 1. don't need to do anything special for Unix-based systems
521# (as underlying file system is byte-code)
522# 2. need to map to short DOS filenames for Windows
523
524sub utf8_to_real_filename
525{
526 my ($utf8_filename) = @_;
527
528 my $real_filename;
529
530 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
531 require Win32;
532
533 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
534
535 my $unicode_filename = decode("utf8",$utf8_filename);
536 $real_filename = Win32::GetShortPathName($unicode_filename);
537 }
538 else {
539 $real_filename = $utf8_filename;
540 }
541
542 return $real_filename;
543}
544
545
546sub fd_exists
547{
548 my $filename_full_path = shift @_;
549 my $test_op = shift @_ || "-e";
550
551 # By default tests for existance of file or directory (-e)
552 # Can be made more specific by providing second parameter (e.g. -f or -d)
553
554 my $exists = 0;
555
556 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
557 require Win32;
558 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
559 if (!defined $filename_short_path) {
560 # Was probably still in UTF8 form (not what is needed on Windows)
561 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
562 if (defined $unicode_filename_full_path) {
563 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
564 }
565 }
566 $filename_full_path = $filename_short_path;
567 }
568
569 if (defined $filename_full_path) {
570 $exists = eval "($test_op \$filename_full_path)";
571 }
572
573 return $exists;
574}
575
576sub file_exists
577{
578 my ($filename_full_path) = @_;
579
580 return fd_exists($filename_full_path,"-f");
581}
582
583sub dir_exists
584{
585 my ($filename_full_path) = @_;
586
587 return fd_exists($filename_full_path,"-d");
588}
589
590
591
592# updates a copy of a directory in some other part of the filesystem
593# verbosity settings are: 0=low, 1=normal, 2=high
594# both $fromdir and $todir should be absolute paths
595sub cachedir {
596 my ($fromdir, $todir, $verbosity) = @_;
597 $verbosity = 1 unless defined $verbosity;
598
599 # use / for the directory separator, remove duplicate and
600 # trailing slashes
601 $fromdir=~s/[\\\/]+/\//g;
602 $fromdir=~s/[\\\/]+$//;
603 $todir=~s/[\\\/]+/\//g;
604 $todir=~s/[\\\/]+$//;
605
606 &mk_all_dir ($todir);
607
608 # get the directories in ascending order
609 if (!opendir (FROMDIR, $fromdir)) {
610 print STDERR "util::cachedir could not read directory $fromdir\n";
611 return;
612 }
613 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
614 closedir (FROMDIR);
615
616 if (!opendir (TODIR, $todir)) {
617 print STDERR "util::cacedir could not read directory $todir\n";
618 return;
619 }
620 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
621 closedir (TODIR);
622
623 my $fromi = 0;
624 my $toi = 0;
625
626 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
627# print "fromi: $fromi toi: $toi\n";
628
629 # see if we should delete a file/directory
630 # this should happen if the file/directory
631 # is not in the from list or if its a different
632 # size, or has an older timestamp
633 if ($toi < scalar(@todir)) {
634 if (($fromi >= scalar(@fromdir)) ||
635 ($todir[$toi] lt $fromdir[$fromi] ||
636 ($todir[$toi] eq $fromdir[$fromi] &&
637 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
638 $verbosity)))) {
639
640 # the files are different
641 &rm_r("$todir/$todir[$toi]");
642 splice(@todir, $toi, 1); # $toi stays the same
643
644 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
645 # the files are the same
646 # if it is a directory, check its contents
647 if (-d "$todir/$todir[$toi]") {
648 &cachedir ("$fromdir/$fromdir[$fromi]",
649 "$todir/$todir[$toi]", $verbosity);
650 }
651
652 $toi++;
653 $fromi++;
654 next;
655 }
656 }
657
658 # see if we should insert a file/directory
659 # we should insert a file/directory if there
660 # is no tofiles left or if the tofile does not exist
661 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
662 $todir[$toi] gt $fromdir[$fromi])) {
663 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
664 splice (@todir, $toi, 0, $fromdir[$fromi]);
665
666 $toi++;
667 $fromi++;
668 }
669 }
670}
671
672# this function returns -1 if either file is not found
673# assumes that $file1 and $file2 are absolute file names or
674# in the current directory
675# $file2 is allowed to be newer than $file1
676sub differentfiles {
677 my ($file1, $file2, $verbosity) = @_;
678 $verbosity = 1 unless defined $verbosity;
679
680 $file1 =~ s/\/+$//;
681 $file2 =~ s/\/+$//;
682
683 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
684 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
685
686 return -1 unless (-e $file1 && -e $file2);
687 if ($file1name ne $file2name) {
688 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
689 return 1;
690 }
691
692 my @file1stat = stat ($file1);
693 my @file2stat = stat ($file2);
694
695 if (-d $file1) {
696 if (! -d $file2) {
697 print STDERR "one file is a directory\n" if ($verbosity >= 2);
698 return 1;
699 }
700 return 0;
701 }
702
703 # both must be regular files
704 unless (-f $file1 && -f $file2) {
705 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
706 return 1;
707 }
708
709 # the size of the files must be the same
710 if ($file1stat[7] != $file2stat[7]) {
711 print STDERR "different sized files\n" if ($verbosity >= 2);
712 return 1;
713 }
714
715 # the second file cannot be older than the first
716 if ($file1stat[9] > $file2stat[9]) {
717 print STDERR "file is older\n" if ($verbosity >= 2);
718 return 1;
719 }
720
721 return 0;
722}
723
724
725sub get_tmp_filename
726{
727 my $file_ext = shift(@_) || undef;
728
729 my $opt_dot_file_ext = "";
730 if (defined $file_ext) {
731 if ($file_ext !~ m/\./) {
732 # no dot, so needs one added in at start
733 $opt_dot_file_ext = ".$file_ext"
734 }
735 else {
736 # allow for "extensions" such as _metadata.txt to be handled
737 # gracefully
738 $opt_dot_file_ext = $file_ext;
739 }
740 }
741
742 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
743 &mk_all_dir ($tmpdir) unless -e $tmpdir;
744
745 my $count = 1000;
746 my $rand = int(rand $count);
747 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
748
749 while (-e $full_tmp_filename) {
750 $rand = int(rand $count);
751 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
752 $count++;
753 }
754
755 return $full_tmp_filename;
756}
757
758sub get_timestamped_tmp_folder
759{
760
761 my $tmp_dirname;
762 if(defined $ENV{'GSDLCOLLECTDIR'}) {
763 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
764 } elsif(defined $ENV{'GSDLHOME'}) {
765 $tmp_dirname = $ENV{'GSDLHOME'};
766 } else {
767 return undef;
768 }
769
770 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
771 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
772
773 # add the timestamp into the path otherwise we can run into problems
774 # if documents have the same name
775 my $timestamp = time;
776 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
777 $tmp_dirname = $time_tmp_dirname;
778 my $i = 1;
779 while (-e $tmp_dirname) {
780 $tmp_dirname = "$time_tmp_dirname$i";
781 $i++;
782 }
783 &util::mk_dir($tmp_dirname);
784
785 return $tmp_dirname;
786}
787
788sub get_timestamped_tmp_filename_in_collection
789{
790
791 my ($input_filename, $output_ext) = @_;
792 # derive tmp filename from input filename
793 my ($tailname, $dirname, $suffix)
794 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
795
796 # softlink to collection tmp dir
797 my $tmp_dirname = &util::get_timestamped_tmp_folder();
798 $tmp_dirname = $dirname unless defined $tmp_dirname;
799
800 # following two steps copied from ConvertBinaryFile
801 # do we need them?? can't use them as is, as they use plugin methods.
802
803 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
804
805 # URLEncode this since htmls with images where the html filename is utf8 don't seem
806 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
807 # files on the filesystem.
808 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
809 if (defined $output_ext) {
810 $output_ext = ".$output_ext"; # add the dot
811 } else {
812 $output_ext = $suffix;
813 }
814 $output_ext= lc($output_ext);
815 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
816
817 return $tmp_filename;
818}
819
820sub get_toplevel_tmp_dir
821{
822 return filename_cat($ENV{'GSDLHOME'}, "tmp");
823}
824
825
826sub filename_to_regex {
827 my $filename = shift (@_);
828
829 # need to put single backslash back to double so that regex works
830 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
831 $filename =~ s/\\/\\\\/g;
832 }
833 return $filename;
834}
835
836sub filename_cat {
837 my $first_file = shift(@_);
838 my (@filenames) = @_;
839
840# Useful for debugging
841# -- might make sense to call caller(0) rather than (1)??
842# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
843# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
844
845 # If first_file is not null or empty, then add it back into the list
846 if (defined $first_file && $first_file =~ /\S/) {
847 unshift(@filenames, $first_file);
848 }
849
850 my $filename = join("/", @filenames);
851
852 # remove duplicate slashes and remove the last slash
853 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
854 $filename =~ s/[\\\/]+/\\/g;
855 } else {
856 $filename =~ s/[\/]+/\//g;
857 # DB: want a filename abc\de.html to remain like this
858 }
859 $filename =~ s/[\\\/]$//;
860
861 return $filename;
862}
863
864
865sub pathname_cat {
866 my $first_path = shift(@_);
867 my (@pathnames) = @_;
868
869 # If first_path is not null or empty, then add it back into the list
870 if (defined $first_path && $first_path =~ /\S/) {
871 unshift(@pathnames, $first_path);
872 }
873
874 my $join_char;
875 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
876 $join_char = ";";
877 } else {
878 $join_char = ":";
879 }
880
881 my $pathname = join($join_char, @pathnames);
882
883 # remove duplicate slashes
884 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
885 $pathname =~ s/[\\\/]+/\\/g;
886 } else {
887 $pathname =~ s/[\/]+/\//g;
888 # DB: want a pathname abc\de.html to remain like this
889 }
890
891 return $pathname;
892}
893
894
895sub tidy_up_oid {
896 my ($OID) = @_;
897 if ($OID =~ /\./) {
898 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
899 $OID =~ s/\.//g; #remove any periods
900 }
901 if ($OID =~ /^\s.*\s$/) {
902 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
903 # remove starting and trailing whitespace
904 $OID =~ s/^\s+//;
905 $OID =~ s/\s+$//;
906 }
907 if ($OID =~ /^[\d]*$/) {
908 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
909 $OID = "D" . $OID;
910 }
911
912 return $OID;
913}
914sub envvar_prepend {
915 my ($var,$val) = @_;
916
917 # do not prepend any value/path that's already in the environment variable
918 if ($ENV{'GSDLOS'} =~ /^windows$/i)
919 {
920 my $escaped_val = $val;
921 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
922 if (!defined($ENV{$var})) {
923 $ENV{$var} = "$val";
924 }
925 elsif($ENV{$var} !~ m/$escaped_val/) {
926 $ENV{$var} = "$val;".$ENV{$var};
927 }
928 }
929 else {
930 if (!defined($ENV{$var})) {
931 $ENV{$var} = "$val";
932 }
933 elsif($ENV{$var} !~ m/$val/) {
934 $ENV{$var} = "$val:".$ENV{$var};
935 }
936 }
937}
938
939sub envvar_append {
940 my ($var,$val) = @_;
941
942 # do not append any value/path that's already in the environment variable
943 if ($ENV{'GSDLOS'} =~ /^windows$/i)
944 {
945 my $escaped_val = $val;
946 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
947 if (!defined($ENV{$var})) {
948 $ENV{$var} = "$val";
949 }
950 elsif($ENV{$var} !~ m/$escaped_val/) {
951 $ENV{$var} .= ";$val";
952 }
953 }
954 else {
955 if (!defined($ENV{$var})) {
956 $ENV{$var} = "$val";
957 }
958 elsif($ENV{$var} !~ m/$val/) {
959 $ENV{$var} .= ":$val";
960 }
961 }
962}
963
964
965# splits a filename into a prefix and a tail extension using the tail_re, or
966# if that fails, splits on the file_extension . (dot)
967sub get_prefix_and_tail_by_regex {
968
969 my ($filename,$tail_re) = @_;
970
971 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
972 if ((!defined $file_prefix) || (!defined $file_ext)) {
973 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
974 }
975
976 return ($file_prefix,$file_ext);
977}
978
979# get full path and file only path from a base_dir (which may be empty) and
980# file (which may contain directories)
981sub get_full_filenames {
982 my ($base_dir, $file) = @_;
983
984 my $filename_full_path = $file;
985 # add on directory if present
986 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
987
988 my $filename_no_path = $file;
989
990 # remove directory if present
991 $filename_no_path =~ s/^.*[\/\\]//;
992 return ($filename_full_path, $filename_no_path);
993}
994
995# returns the path of a file without the filename -- ie. the directory the file is in
996sub filename_head {
997 my $filename = shift(@_);
998
999 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1000 $filename =~ s/[^\\\\]*$//;
1001 }
1002 else {
1003 $filename =~ s/[^\\\/]*$//;
1004 }
1005
1006 return $filename;
1007}
1008
1009
1010
1011# returns 1 if filename1 and filename2 point to the same
1012# file or directory
1013sub filenames_equal {
1014 my ($filename1, $filename2) = @_;
1015
1016 # use filename_cat to clean up trailing slashes and
1017 # multiple slashes
1018 $filename1 = filename_cat ($filename1);
1019 $filename2 = filename_cat ($filename2);
1020
1021 # filenames not case sensitive on windows
1022 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1023 $filename1 =~ tr/[A-Z]/[a-z]/;
1024 $filename2 =~ tr/[A-Z]/[a-z]/;
1025 }
1026 return 1 if $filename1 eq $filename2;
1027 return 0;
1028}
1029
1030
1031sub filename_within_directory
1032{
1033 my ($filename,$within_dir) = @_;
1034
1035 if ($within_dir !~ m/[\/\\]$/) {
1036 my $dirsep = &util::get_dirsep();
1037 $within_dir .= $dirsep;
1038 }
1039
1040 $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
1041
1042 if ($filename =~ m/^$within_dir(.*)$/) {
1043 $filename = $1;
1044 }
1045
1046 return $filename;
1047}
1048
1049sub filename_within_collection
1050{
1051 my ($filename) = @_;
1052
1053 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1054
1055 if (defined $collect_dir) {
1056
1057 # if from within GSDLCOLLECTDIR, then remove directory prefix
1058 # so source_filename is realative to it. This is done to aid
1059 # portability, i.e. the collection can be moved to somewhere
1060 # else on the file system and the archives directory will still
1061 # work. This is needed, for example in the applet version of
1062 # GLI where GSDLHOME/collect on the server will be different to
1063 # the collect directory of the remove user. Of course,
1064 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1065 # it back into a full pathname.
1066
1067 $filename = filename_within_directory($filename,$collect_dir);
1068 }
1069
1070 return $filename;
1071}
1072
1073sub prettyprint_file
1074{
1075 my ($base_dir,$file) = @_;
1076
1077 my $filename_full_path = &util::filename_cat($base_dir,$file);
1078
1079 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1080 require Win32;
1081
1082 # For some reason base_dir in the form c:/a/b/c
1083 # This leads to confusion later on, so turn it back into
1084 # the more usual Windows form
1085 $base_dir =~ s/\//\\/g;
1086 my $long_base_dir = Win32::GetLongPathName($base_dir);
1087 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1088
1089 $file = filename_within_directory($long_full_path,$long_base_dir);
1090 }
1091
1092 return $file;
1093}
1094
1095
1096sub upgrade_if_dos_filename
1097{
1098 my ($filename_full_path,$and_encode) = @_;
1099
1100 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1101 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1102 # to its long (Windows) version
1103 $filename_full_path = Win32::GetLongPathName($filename_full_path);
1104 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1105 $filename_full_path =~ s/^(.)/\l$1/;
1106 if ((defined $and_encode) && ($and_encode)) {
1107 $filename_full_path = encode("utf8",$filename_full_path);
1108 }
1109 }
1110
1111 return $filename_full_path;
1112}
1113
1114
1115sub downgrade_if_dos_filename
1116{
1117 my ($filename_full_path) = @_;
1118
1119 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1120 require Win32;
1121
1122 # Ensure the given long Windows filename is in a form that can
1123 # be opened by Perl => convert it to a short DOS-like filename
1124
1125## print STDERR "**** asked to downgrade: $filename_full_path\n";
1126
1127 $filename_full_path = Win32::GetShortPathName($filename_full_path);
1128
1129## print STDERR "**** now: $filename_full_path\n";
1130
1131 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1132 $filename_full_path =~ s/^(.)/\l$1/;
1133 }
1134
1135 return $filename_full_path;
1136}
1137
1138
1139sub filename_is_absolute
1140{
1141 my ($filename) = @_;
1142
1143 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1144 return ($filename =~ m/^(\w:)?\\/);
1145 }
1146 else {
1147 return ($filename =~ m/^\//);
1148 }
1149}
1150
1151
1152## @method make_absolute()
1153#
1154# Ensure the given file path is absolute in respect to the given base path.
1155#
1156# @param $base_dir A string denoting the base path the given dir must be
1157# absolute to.
1158# @param $dir The directory to be made absolute as a string. Note that the
1159# dir may already be absolute, in which case it will remain
1160# unchanged.
1161# @return The now absolute form of the directory as a string.
1162#
1163# @author John Thompson, DL Consulting Ltd.
1164# @copy 2006 DL Consulting Ltd.
1165#
1166#used in buildcol.pl, doesn't work for all cases --kjdon
1167sub make_absolute {
1168
1169 my ($base_dir, $dir) = @_;
1170### print STDERR "dir = $dir\n";
1171 $dir =~ s/[\\\/]+/\//g;
1172 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1173 $dir =~ s|^/tmp_mnt||;
1174 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1175 $dir =~ s|/[.][.]?/|/|g;
1176 $dir =~ tr|/|/|s;
1177### print STDERR "dir = $dir\n";
1178
1179 return $dir;
1180}
1181## make_absolute() ##
1182
1183sub get_dirsep {
1184
1185 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1186 return "\\";
1187 } else {
1188 return "\/";
1189 }
1190}
1191
1192sub get_os_dirsep {
1193
1194 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1195 return "\\\\";
1196 } else {
1197 return "\\\/";
1198 }
1199}
1200
1201sub get_re_dirsep {
1202
1203 return "\\\\|\\\/";
1204}
1205
1206
1207sub get_dirsep_tail {
1208 my ($filename) = @_;
1209
1210 # returns last part of directory or filename
1211 # On unix e.g. a/b.d => b.d
1212 # a/b/c => c
1213
1214 my $dirsep = get_re_dirsep();
1215 my @dirs = split (/$dirsep/, $filename);
1216 my $tail = pop @dirs;
1217
1218 # - caused problems under windows
1219 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1220
1221 return $tail;
1222}
1223
1224
1225# if this is running on windows we want binaries to end in
1226# .exe, otherwise they don't have to end in any extension
1227sub get_os_exe {
1228 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1229 return "";
1230}
1231
1232
1233# test to see whether this is a big or little endian machine
1234sub is_little_endian
1235{
1236 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1237 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1238 # Otherwise, it's little endian
1239
1240 #return 0 if $^O =~ /^darwin$/i;
1241 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1242
1243 # Going back to stating exactly whether the machine is little endian
1244 # or big endian, without any special case for Macs. Since for rata it comes
1245 # back with little endian and for shuttle with bigendian.
1246 return (ord(substr(pack("s",1), 0, 1)) == 1);
1247}
1248
1249
1250# will return the collection name if successful, "" otherwise
1251sub use_collection {
1252 my ($collection, $collectdir) = @_;
1253
1254 if (!defined $collectdir || $collectdir eq "") {
1255 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1256 }
1257
1258 # get and check the collection
1259 if (!defined($collection) || $collection eq "") {
1260 if (defined $ENV{'GSDLCOLLECTION'}) {
1261 $collection = $ENV{'GSDLCOLLECTION'};
1262 } else {
1263 print STDOUT "No collection specified\n";
1264 return "";
1265 }
1266 }
1267
1268 if ($collection eq "modelcol") {
1269 print STDOUT "You can't use modelcol.\n";
1270 return "";
1271 }
1272
1273 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1274 # are defined
1275 $ENV{'GSDLCOLLECTION'} = $collection;
1276 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1277
1278 # make sure this collection exists
1279 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1280 print STDOUT "Invalid collection ($collection).\n";
1281 return "";
1282 }
1283
1284 # everything is ready to go
1285 return $collection;
1286}
1287
1288sub get_current_collection_name {
1289 return $ENV{'GSDLCOLLECTION'};
1290}
1291
1292
1293# will return the collection name if successful, "" otherwise.
1294# Like use_collection (above) but for greenstone 3 (taking account of site level)
1295
1296sub use_site_collection {
1297 my ($site, $collection, $collectdir) = @_;
1298
1299 if (!defined $collectdir || $collectdir eq "") {
1300 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1301 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1302 }
1303
1304 # collectdir explicitly set by this point (using $site variable if required).
1305 # Can call "old" gsdl2 use_collection now.
1306
1307 return use_collection($collection,$collectdir);
1308}
1309
1310
1311
1312sub locate_config_file
1313{
1314 my ($file) = @_;
1315
1316 my $locations = locate_config_files($file);
1317
1318 return shift @$locations; # returns undef if 'locations' is empty
1319}
1320
1321
1322sub locate_config_files
1323{
1324 my ($file) = @_;
1325
1326 my @locations = ();
1327
1328 if (-e $file) {
1329 # Clearly specified (most likely full filename)
1330 # No need to hunt in 'etc' directories, return value unchanged
1331 push(@locations,$file);
1332 }
1333 else {
1334 # Check for collection specific one before looking in global GSDL 'etc'
1335 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1336 my $test_collect_etc_filename
1337 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1338
1339 if (-e $test_collect_etc_filename) {
1340 push(@locations,$test_collect_etc_filename);
1341 }
1342 }
1343 my $test_main_etc_filename
1344 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1345 if (-e $test_main_etc_filename) {
1346 push(@locations,$test_main_etc_filename);
1347 }
1348 }
1349
1350 return \@locations;
1351}
1352
1353
1354sub hyperlink_text
1355{
1356 my ($text) = @_;
1357
1358 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1359 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1360
1361 return $text;
1362}
1363
1364
1365# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1366# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1367sub is_dir_empty
1368{
1369 my ($path) = @_;
1370 opendir DIR, $path;
1371 while(my $entry = readdir DIR) {
1372 next if($entry =~ /^\.\.?$/);
1373 closedir DIR;
1374 return 0;
1375 }
1376 closedir DIR;
1377 return 1;
1378}
1379
1380# Returns the given filename converted using either URL encoding or base64
1381# encoding, as specified by $rename_method. If the given filename has no suffix
1382# (if it is just the tailname), then $no_suffix should be some defined value.
1383# rename_method can be url, none, base64
1384sub rename_file {
1385 my ($filename, $rename_method, $no_suffix) = @_;
1386
1387 if(!$filename) { # undefined or empty string
1388 return $filename;
1389 }
1390
1391 if (!$rename_method) {
1392 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1393 # Debugging information
1394 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1395 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1396 $rename_method = "url";
1397 } elsif($rename_method eq "none") {
1398 return $filename; # would have already been renamed
1399 }
1400
1401 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1402 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1403 ###$filename =~ s/ /_/g;
1404
1405 my ($tailname,$dirname,$suffix);
1406 if($no_suffix) { # given a tailname, no suffix
1407 ($tailname,$dirname) = File::Basename::fileparse($filename);
1408 }
1409 else {
1410 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1411 }
1412 if (!$suffix) {
1413 $suffix = "";
1414 }
1415 else {
1416 $suffix = lc($suffix);
1417 }
1418
1419 if ($rename_method eq "url") {
1420 $tailname = &unicode::url_encode($tailname);
1421 }
1422 elsif ($rename_method eq "base64") {
1423 $tailname = &unicode::base64_encode($tailname);
1424 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1425 }
1426
1427 $filename = "$tailname$suffix";
1428 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1429
1430 return $filename;
1431}
1432
1433
1434# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1435sub rename_ldb_or_bdb_file {
1436 my ($filename_no_ext) = @_;
1437
1438 my $new_filename = "$filename_no_ext.gdb";
1439 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1440 # try ldb
1441 my $old_filename = "$filename_no_ext.ldb";
1442
1443 if (-f $old_filename) {
1444 print STDERR "Renaming $old_filename to $new_filename\n";
1445 rename ($old_filename, $new_filename)
1446 || print STDERR "Rename failed: $!\n";
1447 return;
1448 }
1449 # try bdb
1450 $old_filename = "$filename_no_ext.bdb";
1451 if (-f $old_filename) {
1452 print STDERR "Renaming $old_filename to $new_filename\n";
1453 rename ($old_filename, $new_filename)
1454 || print STDERR "Rename failed: $!\n";
1455 return;
1456 }
1457}
1458
1459
1460# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1461# By default, /greenstone3 for GS3 or /greenstone for GS2.
1462sub get_greenstone_url_prefix() {
1463 # if already set on a previous occasion, just return that
1464 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1465 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1466
1467 my ($configfile, $urlprefix, $defaultUrlprefix);
1468 my @propertynames = ();
1469
1470 if($ENV{'GSDL3SRCHOME'}) {
1471 $defaultUrlprefix = "/greenstone3";
1472 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1473 push(@propertynames, qw/path\s*\=/);
1474 } else {
1475 $defaultUrlprefix = "/greenstone";
1476 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1477 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1478 }
1479
1480 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1481
1482 if(!$urlprefix) { # no values found for URL prefix, use default values
1483 $urlprefix = $defaultUrlprefix;
1484 } else {
1485 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1486 $urlprefix =~ s/^\///; # remove the starting slash
1487 my @dirs = split(/(\\|\/)/, $urlprefix);
1488 $urlprefix = shift(@dirs);
1489
1490 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1491 $urlprefix = "/$urlprefix";
1492 }
1493 }
1494
1495 # set for the future
1496 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1497# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1498 return $urlprefix;
1499}
1500
1501
1502# Given a config file (xml or java properties file) and a list/array of regular expressions
1503# that represent property names to match on, this function will return the value for the 1st
1504# matching property name. If the return value is undefined, no matching property was found.
1505sub extract_propvalue_from_file() {
1506 my ($configfile, $propertynames) = @_;
1507
1508 my $value;
1509 unless(open(FIN, "<$configfile")) {
1510 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1511 return $value; # not initialised
1512 }
1513
1514 # Read the entire file at once, as one single line, then close it
1515 my $filecontents;
1516 {
1517 local $/ = undef;
1518 $filecontents = <FIN>;
1519 }
1520 close(FIN);
1521
1522 foreach my $regex (@$propertynames) {
1523 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1524 if($value) {
1525 $value =~ s/^\"//; # remove any startquotes
1526 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1527 last; # found value for a matching property, break from loop
1528 }
1529 }
1530
1531 return $value;
1532}
1533
1534# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1535# given that perllib is in @INC in order to invoke this subroutine.
1536# Call as follows -- after setting up INC to include perllib and
1537# after setting up GSDLHOME and GSDLOS:
1538#
1539# require util;
1540# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1541#
1542sub setup_greenstone_env() {
1543 my ($GSDLHOME, $GSDLOS) = @_;
1544
1545 #my %env_map = ();
1546 # Get the localised ENV settings of running a localised source setup.bash
1547 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1548 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1549 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1550 if($GSDLOS =~ m/windows/i) {
1551 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1552 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1553 }
1554 if (!open(PIN, "$perl_command |")) {
1555 print STDERR ("Unable to execute command: $perl_command. $!\n");
1556 }
1557
1558 while (defined (my $perl_output_line = <PIN>)) {
1559 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1560 #$env_map{$key}=$value;
1561 $ENV{$key}=$value;
1562 }
1563
1564 # If any keys in $ENV don't occur in Greenstone's localised env
1565 # (stored in $env_map), delete those entries from $ENV
1566 #foreach $key (keys %ENV) {
1567 # if(!defined $env_map{$key}) {
1568 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1569 # delete $ENV{$key}; # del $ENV(key, value) pair
1570 # }
1571 #}
1572 #undef %env_map;
1573}
1574
15751;
Note: See TracBrowser for help on using the repository browser.