source: main/trunk/greenstone2/perllib/util.pm@ 24563

Last change on this file since 24563 was 24563, checked in by ak19, 13 years ago
  1. On Windows, Depositor needs env var OS for the call to util::setup_greenstone_env() from bin\script\build to work (it will try to run setup.bat; if OS is set, it will be Windows_NT and if it's not set, it will default to Win95 and try to run a COMMAND statement that conflicts with the Depositor's building process. The fix is to let Apache have access to the env var OS, by using PassEnv. 2. Fixed oversight of not closing filehandle in util::setup_greenstone_env().
  • Property svn:keywords set to Author Date Id Revision
File size: 44.2 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
42
43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
47 foreach my $file (@files) {
48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
71
72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
89
90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96 # recursively remove the files
97 foreach my $file (@files_array) {
98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
101 print STDERR "util::filtered_rm_r $file does not exist\n";
102
103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
106 print STDERR "util::filtered_rm_r could not open directory $file\n";
107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
110
111 # remove all the files in this directory
112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
120 }
121 }
122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
129 }
130 }
131}
132
133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
151
152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
167 foreach my $file (@srcfiles) {
168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 rename ($file, $tempdest);
177 }
178 }
179}
180
181
182# copies a file or a group of files
183sub cp {
184 my $dest = pop (@_);
185 my (@srcfiles) = @_;
186
187 # remove trailing slashes from source and destination files
188 $dest =~ s/[\\\/]+$//;
189 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
190
191 # a few sanity checks
192 if (scalar (@srcfiles) == 0) {
193 print STDERR "util::cp no destination directory given\n";
194 return;
195 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
196 print STDERR "util::cp if multiple source files are given the ".
197 "destination must be a directory\n";
198 return;
199 }
200
201 # copy the files
202 foreach my $file (@srcfiles) {
203 my $tempdest = $dest;
204 if (-d $tempdest) {
205 my ($filename) = $file =~ /([^\\\/]+)$/;
206 $tempdest .= "/$filename";
207 }
208 if (!-e $file) {
209 print STDERR "util::cp $file does not exist\n";
210 } elsif (!-f $file) {
211 print STDERR "util::cp $file is not a plain file\n";
212 } else {
213 &File::Copy::copy ($file, $tempdest);
214 }
215 }
216}
217
218
219
220# recursively copies a file or group of files
221# syntax: cp_r (sourcefiles, destination directory)
222# destination must be a directory - to copy one file to
223# another use cp instead
224sub cp_r {
225 my $dest = pop (@_);
226 my (@srcfiles) = @_;
227
228 # a few sanity checks
229 if (scalar (@srcfiles) == 0) {
230 print STDERR "util::cp_r no destination directory given\n";
231 return;
232 } elsif (-f $dest) {
233 print STDERR "util::cp_r destination must be a directory\n";
234 return;
235 }
236
237 # create destination directory if it doesn't exist already
238 if (! -d $dest) {
239 my $store_umask = umask(0002);
240 mkdir ($dest, 0777);
241 umask($store_umask);
242 }
243
244 # copy the files
245 foreach my $file (@srcfiles) {
246
247 if (!-e $file) {
248 print STDERR "util::cp_r $file does not exist\n";
249
250 } elsif (-d $file) {
251 # make the new directory
252 my ($filename) = $file =~ /([^\\\/]*)$/;
253 $dest = &util::filename_cat ($dest, $filename);
254 my $store_umask = umask(0002);
255 mkdir ($dest, 0777);
256 umask($store_umask);
257
258 # get the contents of this directory
259 if (!opendir (INDIR, $file)) {
260 print STDERR "util::cp_r could not open directory $file\n";
261 } else {
262 my @filedir = readdir (INDIR);
263 closedir (INDIR);
264 foreach my $f (@filedir) {
265 next if $f =~ /^\.\.?$/;
266 # copy all the files in this directory
267 my $ff = &util::filename_cat ($file, $f);
268 &cp_r ($ff, $dest);
269 }
270 }
271
272 } else {
273 &cp($file, $dest);
274 }
275 }
276}
277# recursively copies a file or group of files
278# syntax: cp_r (sourcefiles, destination directory)
279# destination must be a directory - to copy one file to
280# another use cp instead
281sub cp_r_nosvn {
282 my $dest = pop (@_);
283 my (@srcfiles) = @_;
284
285 # a few sanity checks
286 if (scalar (@srcfiles) == 0) {
287 print STDERR "util::cp_r no destination directory given\n";
288 return;
289 } elsif (-f $dest) {
290 print STDERR "util::cp_r destination must be a directory\n";
291 return;
292 }
293
294 # create destination directory if it doesn't exist already
295 if (! -d $dest) {
296 my $store_umask = umask(0002);
297 mkdir ($dest, 0777);
298 umask($store_umask);
299 }
300
301 # copy the files
302 foreach my $file (@srcfiles) {
303
304 if (!-e $file) {
305 print STDERR "util::cp_r $file does not exist\n";
306
307 } elsif (-d $file) {
308 # make the new directory
309 my ($filename) = $file =~ /([^\\\/]*)$/;
310 $dest = &util::filename_cat ($dest, $filename);
311 my $store_umask = umask(0002);
312 mkdir ($dest, 0777);
313 umask($store_umask);
314
315 # get the contents of this directory
316 if (!opendir (INDIR, $file)) {
317 print STDERR "util::cp_r could not open directory $file\n";
318 } else {
319 my @filedir = readdir (INDIR);
320 closedir (INDIR);
321 foreach my $f (@filedir) {
322 next if $f =~ /^\.\.?$/;
323 next if $f =~ /^\.svn$/;
324 # copy all the files in this directory
325 my $ff = &util::filename_cat ($file, $f);
326 &cp_r ($ff, $dest);
327 }
328 }
329
330 } else {
331 &cp($file, $dest);
332 }
333 }
334}
335
336# copies a directory and its contents, excluding subdirectories, into a new directory
337sub cp_r_toplevel {
338 my $dest = pop (@_);
339 my (@srcfiles) = @_;
340
341 # a few sanity checks
342 if (scalar (@srcfiles) == 0) {
343 print STDERR "util::cp_r no destination directory given\n";
344 return;
345 } elsif (-f $dest) {
346 print STDERR "util::cp_r destination must be a directory\n";
347 return;
348 }
349
350 # create destination directory if it doesn't exist already
351 if (! -d $dest) {
352 my $store_umask = umask(0002);
353 mkdir ($dest, 0777);
354 umask($store_umask);
355 }
356
357 # copy the files
358 foreach my $file (@srcfiles) {
359
360 if (!-e $file) {
361 print STDERR "util::cp_r $file does not exist\n";
362
363 } elsif (-d $file) {
364 # make the new directory
365 my ($filename) = $file =~ /([^\\\/]*)$/;
366 $dest = &util::filename_cat ($dest, $filename);
367 my $store_umask = umask(0002);
368 mkdir ($dest, 0777);
369 umask($store_umask);
370
371 # get the contents of this directory
372 if (!opendir (INDIR, $file)) {
373 print STDERR "util::cp_r could not open directory $file\n";
374 } else {
375 my @filedir = readdir (INDIR);
376 closedir (INDIR);
377 foreach my $f (@filedir) {
378 next if $f =~ /^\.\.?$/;
379
380 # copy all the files in this directory, but not directories
381 my $ff = &util::filename_cat ($file, $f);
382 if (-f $ff) {
383 &cp($ff, $dest);
384 #&cp_r ($ff, $dest);
385 }
386 }
387 }
388
389 } else {
390 &cp($file, $dest);
391 }
392 }
393}
394
395sub mk_dir {
396 my ($dir) = @_;
397
398 my $store_umask = umask(0002);
399 my $mkdir_ok = mkdir ($dir, 0777);
400 umask($store_umask);
401
402 if (!$mkdir_ok)
403 {
404 print STDERR "util::mk_dir could not create directory $dir\n";
405 return;
406 }
407}
408
409# in case anyone cares - I did some testing (using perls Benchmark module)
410# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
411# slightly faster (surprisingly) - Stefan.
412sub mk_all_dir {
413 my ($dir) = @_;
414
415 # use / for the directory separator, remove duplicate and
416 # trailing slashes
417 $dir=~s/[\\\/]+/\//g;
418 $dir=~s/[\\\/]+$//;
419
420 # make sure the cache directory exists
421 my $dirsofar = "";
422 my $first = 1;
423 foreach my $dirname (split ("/", $dir)) {
424 $dirsofar .= "/" unless $first;
425 $first = 0;
426
427 $dirsofar .= $dirname;
428
429 next if $dirname =~ /^(|[a-z]:)$/i;
430 if (!-e $dirsofar)
431 {
432 my $store_umask = umask(0002);
433 my $mkdir_ok = mkdir ($dirsofar, 0777);
434 umask($store_umask);
435 if (!$mkdir_ok)
436 {
437 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
438 return;
439 }
440 }
441 }
442}
443
444# make hard link to file if supported by OS, otherwise copy the file
445sub hard_link {
446 my ($src, $dest, $verbosity) = @_;
447
448 # remove trailing slashes from source and destination files
449 $src =~ s/[\\\/]+$//;
450 $dest =~ s/[\\\/]+$//;
451
452## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
453 # a few sanity checks
454 if (-e $dest) {
455 # destination file already exists
456 return;
457 }
458 elsif (!-e $src) {
459 print STDERR "util::hard_link source file \"$src\" does not exist\n";
460 return 1;
461 }
462 elsif (-d $src) {
463 print STDERR "util::hard_link source \"$src\" is a directory\n";
464 return 1;
465 }
466
467 my $dest_dir = &File::Basename::dirname($dest);
468 mk_all_dir($dest_dir) if (!-e $dest_dir);
469
470
471 if (!link($src, $dest)) {
472 if ((!defined $verbosity) || ($verbosity>2)) {
473 print STDERR "util::hard_link: unable to create hard link. ";
474 print STDERR " Copying file: $src -> $dest\n";
475 }
476 &File::Copy::copy ($src, $dest);
477 }
478 return 0;
479}
480
481# make soft link to file if supported by OS, otherwise copy file
482sub soft_link {
483 my ($src, $dest, $ensure_paths_absolute) = @_;
484
485 # remove trailing slashes from source and destination files
486 $src =~ s/[\\\/]+$//;
487 $dest =~ s/[\\\/]+$//;
488
489 # Ensure file paths are absolute IF requested to do so
490 # Soft_linking didn't work for relative paths
491 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
492 # We need to ensure that the src file is the absolute path
493 # See http://perldoc.perl.org/File/Spec.html
494 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
495 $src = File::Spec->rel2abs($src); # make absolute
496 }
497 # Might as well ensure that the destination file's absolute path is used
498 if(!File::Spec->file_name_is_absolute( $dest )) {
499 $dest = File::Spec->rel2abs($dest); # make absolute
500 }
501 }
502
503 # a few sanity checks
504 if (!-e $src) {
505 print STDERR "util::soft_link source file $src does not exist\n";
506 return 0;
507 }
508
509 my $dest_dir = &File::Basename::dirname($dest);
510 mk_all_dir($dest_dir) if (!-e $dest_dir);
511
512 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
513
514 # symlink not supported on windows
515 &File::Copy::copy ($src, $dest);
516
517 } elsif (!eval {symlink($src, $dest)}) {
518 print STDERR "util::soft_link: unable to create soft link.\n";
519 return 0;
520 }
521
522 return 1;
523}
524
525# Primarily for filenames generated by processing
526# content of HTML files (which are mapped to UTF-8 internally)
527#
528# To turn this into an octet string that really exists on the file
529# system:
530# 1. don't need to do anything special for Unix-based systems
531# (as underlying file system is byte-code)
532# 2. need to map to short DOS filenames for Windows
533
534sub utf8_to_real_filename
535{
536 my ($utf8_filename) = @_;
537
538 my $real_filename;
539
540 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
541 require Win32;
542
543 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
544
545 my $unicode_filename = decode("utf8",$utf8_filename);
546 $real_filename = Win32::GetShortPathName($unicode_filename);
547 }
548 else {
549 $real_filename = $utf8_filename;
550 }
551
552 return $real_filename;
553}
554
555
556sub fd_exists
557{
558 my $filename_full_path = shift @_;
559 my $test_op = shift @_ || "-e";
560
561 # By default tests for existance of file or directory (-e)
562 # Can be made more specific by providing second parameter (e.g. -f or -d)
563
564 my $exists = 0;
565
566 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
567 require Win32;
568 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
569 if (!defined $filename_short_path) {
570 # Was probably still in UTF8 form (not what is needed on Windows)
571 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
572 if (defined $unicode_filename_full_path) {
573 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
574 }
575 }
576 $filename_full_path = $filename_short_path;
577 }
578
579 if (defined $filename_full_path) {
580 $exists = eval "($test_op \$filename_full_path)";
581 }
582
583 return $exists;
584}
585
586sub file_exists
587{
588 my ($filename_full_path) = @_;
589
590 return fd_exists($filename_full_path,"-f");
591}
592
593sub dir_exists
594{
595 my ($filename_full_path) = @_;
596
597 return fd_exists($filename_full_path,"-d");
598}
599
600
601
602# updates a copy of a directory in some other part of the filesystem
603# verbosity settings are: 0=low, 1=normal, 2=high
604# both $fromdir and $todir should be absolute paths
605sub cachedir {
606 my ($fromdir, $todir, $verbosity) = @_;
607 $verbosity = 1 unless defined $verbosity;
608
609 # use / for the directory separator, remove duplicate and
610 # trailing slashes
611 $fromdir=~s/[\\\/]+/\//g;
612 $fromdir=~s/[\\\/]+$//;
613 $todir=~s/[\\\/]+/\//g;
614 $todir=~s/[\\\/]+$//;
615
616 &mk_all_dir ($todir);
617
618 # get the directories in ascending order
619 if (!opendir (FROMDIR, $fromdir)) {
620 print STDERR "util::cachedir could not read directory $fromdir\n";
621 return;
622 }
623 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
624 closedir (FROMDIR);
625
626 if (!opendir (TODIR, $todir)) {
627 print STDERR "util::cacedir could not read directory $todir\n";
628 return;
629 }
630 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
631 closedir (TODIR);
632
633 my $fromi = 0;
634 my $toi = 0;
635
636 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
637# print "fromi: $fromi toi: $toi\n";
638
639 # see if we should delete a file/directory
640 # this should happen if the file/directory
641 # is not in the from list or if its a different
642 # size, or has an older timestamp
643 if ($toi < scalar(@todir)) {
644 if (($fromi >= scalar(@fromdir)) ||
645 ($todir[$toi] lt $fromdir[$fromi] ||
646 ($todir[$toi] eq $fromdir[$fromi] &&
647 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
648 $verbosity)))) {
649
650 # the files are different
651 &rm_r("$todir/$todir[$toi]");
652 splice(@todir, $toi, 1); # $toi stays the same
653
654 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
655 # the files are the same
656 # if it is a directory, check its contents
657 if (-d "$todir/$todir[$toi]") {
658 &cachedir ("$fromdir/$fromdir[$fromi]",
659 "$todir/$todir[$toi]", $verbosity);
660 }
661
662 $toi++;
663 $fromi++;
664 next;
665 }
666 }
667
668 # see if we should insert a file/directory
669 # we should insert a file/directory if there
670 # is no tofiles left or if the tofile does not exist
671 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
672 $todir[$toi] gt $fromdir[$fromi])) {
673 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
674 splice (@todir, $toi, 0, $fromdir[$fromi]);
675
676 $toi++;
677 $fromi++;
678 }
679 }
680}
681
682# this function returns -1 if either file is not found
683# assumes that $file1 and $file2 are absolute file names or
684# in the current directory
685# $file2 is allowed to be newer than $file1
686sub differentfiles {
687 my ($file1, $file2, $verbosity) = @_;
688 $verbosity = 1 unless defined $verbosity;
689
690 $file1 =~ s/\/+$//;
691 $file2 =~ s/\/+$//;
692
693 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
694 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
695
696 return -1 unless (-e $file1 && -e $file2);
697 if ($file1name ne $file2name) {
698 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
699 return 1;
700 }
701
702 my @file1stat = stat ($file1);
703 my @file2stat = stat ($file2);
704
705 if (-d $file1) {
706 if (! -d $file2) {
707 print STDERR "one file is a directory\n" if ($verbosity >= 2);
708 return 1;
709 }
710 return 0;
711 }
712
713 # both must be regular files
714 unless (-f $file1 && -f $file2) {
715 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
716 return 1;
717 }
718
719 # the size of the files must be the same
720 if ($file1stat[7] != $file2stat[7]) {
721 print STDERR "different sized files\n" if ($verbosity >= 2);
722 return 1;
723 }
724
725 # the second file cannot be older than the first
726 if ($file1stat[9] > $file2stat[9]) {
727 print STDERR "file is older\n" if ($verbosity >= 2);
728 return 1;
729 }
730
731 return 0;
732}
733
734
735sub get_tmp_filename
736{
737 my $file_ext = shift(@_) || undef;
738
739 my $opt_dot_file_ext = "";
740 if (defined $file_ext) {
741 if ($file_ext !~ m/\./) {
742 # no dot, so needs one added in at start
743 $opt_dot_file_ext = ".$file_ext"
744 }
745 else {
746 # allow for "extensions" such as _metadata.txt to be handled
747 # gracefully
748 $opt_dot_file_ext = $file_ext;
749 }
750 }
751
752 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
753 &mk_all_dir ($tmpdir) unless -e $tmpdir;
754
755 my $count = 1000;
756 my $rand = int(rand $count);
757 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
758
759 while (-e $full_tmp_filename) {
760 $rand = int(rand $count);
761 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
762 $count++;
763 }
764
765 return $full_tmp_filename;
766}
767
768sub get_timestamped_tmp_folder
769{
770
771 my $tmp_dirname;
772 if(defined $ENV{'GSDLCOLLECTDIR'}) {
773 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
774 } elsif(defined $ENV{'GSDLHOME'}) {
775 $tmp_dirname = $ENV{'GSDLHOME'};
776 } else {
777 return undef;
778 }
779
780 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
781 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
782
783 # add the timestamp into the path otherwise we can run into problems
784 # if documents have the same name
785 my $timestamp = time;
786 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
787 $tmp_dirname = $time_tmp_dirname;
788 my $i = 1;
789 while (-e $tmp_dirname) {
790 $tmp_dirname = "$time_tmp_dirname$i";
791 $i++;
792 }
793 &util::mk_dir($tmp_dirname);
794
795 return $tmp_dirname;
796}
797
798sub get_timestamped_tmp_filename_in_collection
799{
800
801 my ($input_filename, $output_ext) = @_;
802 # derive tmp filename from input filename
803 my ($tailname, $dirname, $suffix)
804 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
805
806 # softlink to collection tmp dir
807 my $tmp_dirname = &util::get_timestamped_tmp_folder();
808 $tmp_dirname = $dirname unless defined $tmp_dirname;
809
810 # following two steps copied from ConvertBinaryFile
811 # do we need them?? can't use them as is, as they use plugin methods.
812
813 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
814
815 # URLEncode this since htmls with images where the html filename is utf8 don't seem
816 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
817 # files on the filesystem.
818 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
819 if (defined $output_ext) {
820 $output_ext = ".$output_ext"; # add the dot
821 } else {
822 $output_ext = $suffix;
823 }
824 $output_ext= lc($output_ext);
825 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
826
827 return $tmp_filename;
828}
829
830sub get_toplevel_tmp_dir
831{
832 return filename_cat($ENV{'GSDLHOME'}, "tmp");
833}
834
835
836sub filename_to_regex {
837 my $filename = shift (@_);
838
839 # need to put single backslash back to double so that regex works
840 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
841 $filename =~ s/\\/\\\\/g;
842 }
843 return $filename;
844}
845
846sub filename_cat {
847 my $first_file = shift(@_);
848 my (@filenames) = @_;
849
850# Useful for debugging
851# -- might make sense to call caller(0) rather than (1)??
852# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
853# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
854
855 # If first_file is not null or empty, then add it back into the list
856 if (defined $first_file && $first_file =~ /\S/) {
857 unshift(@filenames, $first_file);
858 }
859
860 my $filename = join("/", @filenames);
861
862 # remove duplicate slashes and remove the last slash
863 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
864 $filename =~ s/[\\\/]+/\\/g;
865 } else {
866 $filename =~ s/[\/]+/\//g;
867 # DB: want a filename abc\de.html to remain like this
868 }
869 $filename =~ s/[\\\/]$//;
870
871 return $filename;
872}
873
874
875sub pathname_cat {
876 my $first_path = shift(@_);
877 my (@pathnames) = @_;
878
879 # If first_path is not null or empty, then add it back into the list
880 if (defined $first_path && $first_path =~ /\S/) {
881 unshift(@pathnames, $first_path);
882 }
883
884 my $join_char;
885 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
886 $join_char = ";";
887 } else {
888 $join_char = ":";
889 }
890
891 my $pathname = join($join_char, @pathnames);
892
893 # remove duplicate slashes
894 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
895 $pathname =~ s/[\\\/]+/\\/g;
896 } else {
897 $pathname =~ s/[\/]+/\//g;
898 # DB: want a pathname abc\de.html to remain like this
899 }
900
901 return $pathname;
902}
903
904
905sub tidy_up_oid {
906 my ($OID) = @_;
907 if ($OID =~ /\./) {
908 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
909 $OID =~ s/\.//g; #remove any periods
910 }
911 if ($OID =~ /^\s.*\s$/) {
912 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
913 # remove starting and trailing whitespace
914 $OID =~ s/^\s+//;
915 $OID =~ s/\s+$//;
916 }
917 if ($OID =~ /^[\d]*$/) {
918 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
919 $OID = "D" . $OID;
920 }
921
922 return $OID;
923}
924sub envvar_prepend {
925 my ($var,$val) = @_;
926
927 # do not prepend any value/path that's already in the environment variable
928 if ($ENV{'GSDLOS'} =~ /^windows$/i)
929 {
930 my $escaped_val = $val;
931 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
932 if (!defined($ENV{$var})) {
933 $ENV{$var} = "$val";
934 }
935 elsif($ENV{$var} !~ m/$escaped_val/) {
936 $ENV{$var} = "$val;".$ENV{$var};
937 }
938 }
939 else {
940 if (!defined($ENV{$var})) {
941 $ENV{$var} = "$val";
942 }
943 elsif($ENV{$var} !~ m/$val/) {
944 $ENV{$var} = "$val:".$ENV{$var};
945 }
946 }
947}
948
949sub envvar_append {
950 my ($var,$val) = @_;
951
952 # do not append any value/path that's already in the environment variable
953 if ($ENV{'GSDLOS'} =~ /^windows$/i)
954 {
955 my $escaped_val = $val;
956 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
957 if (!defined($ENV{$var})) {
958 $ENV{$var} = "$val";
959 }
960 elsif($ENV{$var} !~ m/$escaped_val/) {
961 $ENV{$var} .= ";$val";
962 }
963 }
964 else {
965 if (!defined($ENV{$var})) {
966 $ENV{$var} = "$val";
967 }
968 elsif($ENV{$var} !~ m/$val/) {
969 $ENV{$var} .= ":$val";
970 }
971 }
972}
973
974
975# splits a filename into a prefix and a tail extension using the tail_re, or
976# if that fails, splits on the file_extension . (dot)
977sub get_prefix_and_tail_by_regex {
978
979 my ($filename,$tail_re) = @_;
980
981 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
982 if ((!defined $file_prefix) || (!defined $file_ext)) {
983 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
984 }
985
986 return ($file_prefix,$file_ext);
987}
988
989# get full path and file only path from a base_dir (which may be empty) and
990# file (which may contain directories)
991sub get_full_filenames {
992 my ($base_dir, $file) = @_;
993
994 my $filename_full_path = $file;
995 # add on directory if present
996 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
997
998 my $filename_no_path = $file;
999
1000 # remove directory if present
1001 $filename_no_path =~ s/^.*[\/\\]//;
1002 return ($filename_full_path, $filename_no_path);
1003}
1004
1005# returns the path of a file without the filename -- ie. the directory the file is in
1006sub filename_head {
1007 my $filename = shift(@_);
1008
1009 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1010 $filename =~ s/[^\\\\]*$//;
1011 }
1012 else {
1013 $filename =~ s/[^\\\/]*$//;
1014 }
1015
1016 return $filename;
1017}
1018
1019
1020
1021# returns 1 if filename1 and filename2 point to the same
1022# file or directory
1023sub filenames_equal {
1024 my ($filename1, $filename2) = @_;
1025
1026 # use filename_cat to clean up trailing slashes and
1027 # multiple slashes
1028 $filename1 = filename_cat ($filename1);
1029 $filename2 = filename_cat ($filename2);
1030
1031 # filenames not case sensitive on windows
1032 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1033 $filename1 =~ tr/[A-Z]/[a-z]/;
1034 $filename2 =~ tr/[A-Z]/[a-z]/;
1035 }
1036 return 1 if $filename1 eq $filename2;
1037 return 0;
1038}
1039
1040
1041sub filename_within_directory
1042{
1043 my ($filename,$within_dir) = @_;
1044
1045 if ($within_dir !~ m/[\/\\]$/) {
1046 my $dirsep = &util::get_dirsep();
1047 $within_dir .= $dirsep;
1048 }
1049
1050 $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
1051
1052 if ($filename =~ m/^$within_dir(.*)$/) {
1053 $filename = $1;
1054 }
1055
1056 return $filename;
1057}
1058
1059sub filename_within_collection
1060{
1061 my ($filename) = @_;
1062
1063 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1064
1065 if (defined $collect_dir) {
1066
1067 # if from within GSDLCOLLECTDIR, then remove directory prefix
1068 # so source_filename is realative to it. This is done to aid
1069 # portability, i.e. the collection can be moved to somewhere
1070 # else on the file system and the archives directory will still
1071 # work. This is needed, for example in the applet version of
1072 # GLI where GSDLHOME/collect on the server will be different to
1073 # the collect directory of the remove user. Of course,
1074 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1075 # it back into a full pathname.
1076
1077 $filename = filename_within_directory($filename,$collect_dir);
1078 }
1079
1080 return $filename;
1081}
1082
1083sub prettyprint_file
1084{
1085 my ($base_dir,$file,$gli) = @_;
1086
1087 my $filename_full_path = &util::filename_cat($base_dir,$file);
1088
1089 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1090 require Win32;
1091
1092 # For some reason base_dir in the form c:/a/b/c
1093 # This leads to confusion later on, so turn it back into
1094 # the more usual Windows form
1095 $base_dir =~ s/\//\\/g;
1096 my $long_base_dir = Win32::GetLongPathName($base_dir);
1097 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1098
1099 $file = filename_within_directory($long_full_path,$long_base_dir);
1100 $file = encode("utf8",$file) if ($gli);
1101 }
1102
1103 return $file;
1104}
1105
1106
1107sub upgrade_if_dos_filename
1108{
1109 my ($filename_full_path,$and_encode) = @_;
1110
1111 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1112 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1113 # to its long (Windows) version
1114 my $long_filename = Win32::GetLongPathName($filename_full_path);
1115 if (defined $long_filename) {
1116 $filename_full_path = $long_filename;
1117 }
1118 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1119 $filename_full_path =~ s/^(.):/\u$1:/;
1120 if ((defined $and_encode) && ($and_encode)) {
1121 $filename_full_path = encode("utf8",$filename_full_path);
1122 }
1123 }
1124
1125 return $filename_full_path;
1126}
1127
1128
1129sub downgrade_if_dos_filename
1130{
1131 my ($filename_full_path) = @_;
1132
1133 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1134 require Win32;
1135
1136 # Ensure the given long Windows filename is in a form that can
1137 # be opened by Perl => convert it to a short DOS-like filename
1138
1139 my $short_filename = Win32::GetShortPathName($filename_full_path);
1140 if (defined $short_filename) {
1141 $filename_full_path = $short_filename;
1142 }
1143 # Make sure initial drive letter is lower-case (to fit in
1144 # with rest of Greenstone)
1145 $filename_full_path =~ s/^(.):/\u$1:/;
1146 }
1147
1148 return $filename_full_path;
1149}
1150
1151sub block_filename
1152{
1153 my ($block_hash,$filename) = @_;
1154
1155 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1156
1157 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1158 my $lower_filename = lc($filename);
1159 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1160# my $lower_drive = $filename;
1161# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1162
1163# my $upper_drive = $filename;
1164# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1165#
1166# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1167# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1168 }
1169 else {
1170 $block_hash->{'file_blocks'}->{$filename} = 1;
1171 }
1172}
1173
1174
1175sub filename_is_absolute
1176{
1177 my ($filename) = @_;
1178
1179 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1180 return ($filename =~ m/^(\w:)?\\/);
1181 }
1182 else {
1183 return ($filename =~ m/^\//);
1184 }
1185}
1186
1187
1188## @method make_absolute()
1189#
1190# Ensure the given file path is absolute in respect to the given base path.
1191#
1192# @param $base_dir A string denoting the base path the given dir must be
1193# absolute to.
1194# @param $dir The directory to be made absolute as a string. Note that the
1195# dir may already be absolute, in which case it will remain
1196# unchanged.
1197# @return The now absolute form of the directory as a string.
1198#
1199# @author John Thompson, DL Consulting Ltd.
1200# @copy 2006 DL Consulting Ltd.
1201#
1202#used in buildcol.pl, doesn't work for all cases --kjdon
1203sub make_absolute {
1204
1205 my ($base_dir, $dir) = @_;
1206### print STDERR "dir = $dir\n";
1207 $dir =~ s/[\\\/]+/\//g;
1208 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1209 $dir =~ s|^/tmp_mnt||;
1210 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1211 $dir =~ s|/[.][.]?/|/|g;
1212 $dir =~ tr|/|/|s;
1213### print STDERR "dir = $dir\n";
1214
1215 return $dir;
1216}
1217## make_absolute() ##
1218
1219sub get_dirsep {
1220
1221 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1222 return "\\";
1223 } else {
1224 return "\/";
1225 }
1226}
1227
1228sub get_os_dirsep {
1229
1230 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1231 return "\\\\";
1232 } else {
1233 return "\\\/";
1234 }
1235}
1236
1237sub get_re_dirsep {
1238
1239 return "\\\\|\\\/";
1240}
1241
1242
1243sub get_dirsep_tail {
1244 my ($filename) = @_;
1245
1246 # returns last part of directory or filename
1247 # On unix e.g. a/b.d => b.d
1248 # a/b/c => c
1249
1250 my $dirsep = get_re_dirsep();
1251 my @dirs = split (/$dirsep/, $filename);
1252 my $tail = pop @dirs;
1253
1254 # - caused problems under windows
1255 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1256
1257 return $tail;
1258}
1259
1260
1261# if this is running on windows we want binaries to end in
1262# .exe, otherwise they don't have to end in any extension
1263sub get_os_exe {
1264 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1265 return "";
1266}
1267
1268
1269# test to see whether this is a big or little endian machine
1270sub is_little_endian
1271{
1272 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1273 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1274 # Otherwise, it's little endian
1275
1276 #return 0 if $^O =~ /^darwin$/i;
1277 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1278
1279 # Going back to stating exactly whether the machine is little endian
1280 # or big endian, without any special case for Macs. Since for rata it comes
1281 # back with little endian and for shuttle with bigendian.
1282 return (ord(substr(pack("s",1), 0, 1)) == 1);
1283}
1284
1285
1286# will return the collection name if successful, "" otherwise
1287sub use_collection {
1288 my ($collection, $collectdir) = @_;
1289
1290 if (!defined $collectdir || $collectdir eq "") {
1291 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1292 }
1293
1294 # get and check the collection
1295 if (!defined($collection) || $collection eq "") {
1296 if (defined $ENV{'GSDLCOLLECTION'}) {
1297 $collection = $ENV{'GSDLCOLLECTION'};
1298 } else {
1299 print STDOUT "No collection specified\n";
1300 return "";
1301 }
1302 }
1303
1304 if ($collection eq "modelcol") {
1305 print STDOUT "You can't use modelcol.\n";
1306 return "";
1307 }
1308
1309 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1310 # are defined
1311 $ENV{'GSDLCOLLECTION'} = $collection;
1312 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1313
1314 # make sure this collection exists
1315 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1316 print STDOUT "Invalid collection ($collection).\n";
1317 return "";
1318 }
1319
1320 # everything is ready to go
1321 return $collection;
1322}
1323
1324sub get_current_collection_name {
1325 return $ENV{'GSDLCOLLECTION'};
1326}
1327
1328
1329# will return the collection name if successful, "" otherwise.
1330# Like use_collection (above) but for greenstone 3 (taking account of site level)
1331
1332sub use_site_collection {
1333 my ($site, $collection, $collectdir) = @_;
1334
1335 if (!defined $collectdir || $collectdir eq "") {
1336 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1337 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1338 }
1339
1340 # collectdir explicitly set by this point (using $site variable if required).
1341 # Can call "old" gsdl2 use_collection now.
1342
1343 return use_collection($collection,$collectdir);
1344}
1345
1346
1347
1348sub locate_config_file
1349{
1350 my ($file) = @_;
1351
1352 my $locations = locate_config_files($file);
1353
1354 return shift @$locations; # returns undef if 'locations' is empty
1355}
1356
1357
1358sub locate_config_files
1359{
1360 my ($file) = @_;
1361
1362 my @locations = ();
1363
1364 if (-e $file) {
1365 # Clearly specified (most likely full filename)
1366 # No need to hunt in 'etc' directories, return value unchanged
1367 push(@locations,$file);
1368 }
1369 else {
1370 # Check for collection specific one before looking in global GSDL 'etc'
1371 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1372 my $test_collect_etc_filename
1373 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1374
1375 if (-e $test_collect_etc_filename) {
1376 push(@locations,$test_collect_etc_filename);
1377 }
1378 }
1379 my $test_main_etc_filename
1380 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1381 if (-e $test_main_etc_filename) {
1382 push(@locations,$test_main_etc_filename);
1383 }
1384 }
1385
1386 return \@locations;
1387}
1388
1389
1390sub hyperlink_text
1391{
1392 my ($text) = @_;
1393
1394 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1395 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1396
1397 return $text;
1398}
1399
1400
1401# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1402# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1403sub is_dir_empty
1404{
1405 my ($path) = @_;
1406 opendir DIR, $path;
1407 while(my $entry = readdir DIR) {
1408 next if($entry =~ /^\.\.?$/);
1409 closedir DIR;
1410 return 0;
1411 }
1412 closedir DIR;
1413 return 1;
1414}
1415
1416# Returns the given filename converted using either URL encoding or base64
1417# encoding, as specified by $rename_method. If the given filename has no suffix
1418# (if it is just the tailname), then $no_suffix should be some defined value.
1419# rename_method can be url, none, base64
1420sub rename_file {
1421 my ($filename, $rename_method, $no_suffix) = @_;
1422
1423 if(!$filename) { # undefined or empty string
1424 return $filename;
1425 }
1426
1427 if (!$rename_method) {
1428 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1429 # Debugging information
1430 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1431 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1432 $rename_method = "url";
1433 } elsif($rename_method eq "none") {
1434 return $filename; # would have already been renamed
1435 }
1436
1437 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1438 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1439 ###$filename =~ s/ /_/g;
1440
1441 my ($tailname,$dirname,$suffix);
1442 if($no_suffix) { # given a tailname, no suffix
1443 ($tailname,$dirname) = File::Basename::fileparse($filename);
1444 }
1445 else {
1446 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1447 }
1448 if (!$suffix) {
1449 $suffix = "";
1450 }
1451 else {
1452 $suffix = lc($suffix);
1453 }
1454
1455 if ($rename_method eq "url") {
1456 $tailname = &unicode::url_encode($tailname);
1457 }
1458 elsif ($rename_method eq "base64") {
1459 $tailname = &unicode::base64_encode($tailname);
1460 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1461 }
1462
1463 $filename = "$tailname$suffix";
1464 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1465
1466 return $filename;
1467}
1468
1469
1470# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1471sub rename_ldb_or_bdb_file {
1472 my ($filename_no_ext) = @_;
1473
1474 my $new_filename = "$filename_no_ext.gdb";
1475 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1476 # try ldb
1477 my $old_filename = "$filename_no_ext.ldb";
1478
1479 if (-f $old_filename) {
1480 print STDERR "Renaming $old_filename to $new_filename\n";
1481 rename ($old_filename, $new_filename)
1482 || print STDERR "Rename failed: $!\n";
1483 return;
1484 }
1485 # try bdb
1486 $old_filename = "$filename_no_ext.bdb";
1487 if (-f $old_filename) {
1488 print STDERR "Renaming $old_filename to $new_filename\n";
1489 rename ($old_filename, $new_filename)
1490 || print STDERR "Rename failed: $!\n";
1491 return;
1492 }
1493}
1494
1495
1496# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1497# By default, /greenstone3 for GS3 or /greenstone for GS2.
1498sub get_greenstone_url_prefix() {
1499 # if already set on a previous occasion, just return that
1500 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1501 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1502
1503 my ($configfile, $urlprefix, $defaultUrlprefix);
1504 my @propertynames = ();
1505
1506 if($ENV{'GSDL3SRCHOME'}) {
1507 $defaultUrlprefix = "/greenstone3";
1508 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1509 push(@propertynames, qw/path\s*\=/);
1510 } else {
1511 $defaultUrlprefix = "/greenstone";
1512 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1513 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1514 }
1515
1516 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1517
1518 if(!$urlprefix) { # no values found for URL prefix, use default values
1519 $urlprefix = $defaultUrlprefix;
1520 } else {
1521 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1522 $urlprefix =~ s/^\///; # remove the starting slash
1523 my @dirs = split(/(\\|\/)/, $urlprefix);
1524 $urlprefix = shift(@dirs);
1525
1526 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1527 $urlprefix = "/$urlprefix";
1528 }
1529 }
1530
1531 # set for the future
1532 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1533# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1534 return $urlprefix;
1535}
1536
1537
1538# Given a config file (xml or java properties file) and a list/array of regular expressions
1539# that represent property names to match on, this function will return the value for the 1st
1540# matching property name. If the return value is undefined, no matching property was found.
1541sub extract_propvalue_from_file() {
1542 my ($configfile, $propertynames) = @_;
1543
1544 my $value;
1545 unless(open(FIN, "<$configfile")) {
1546 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1547 return $value; # not initialised
1548 }
1549
1550 # Read the entire file at once, as one single line, then close it
1551 my $filecontents;
1552 {
1553 local $/ = undef;
1554 $filecontents = <FIN>;
1555 }
1556 close(FIN);
1557
1558 foreach my $regex (@$propertynames) {
1559 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1560 if($value) {
1561 $value =~ s/^\"//; # remove any startquotes
1562 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1563 last; # found value for a matching property, break from loop
1564 }
1565 }
1566
1567 return $value;
1568}
1569
1570# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1571# given that perllib is in @INC in order to invoke this subroutine.
1572# Call as follows -- after setting up INC to include perllib and
1573# after setting up GSDLHOME and GSDLOS:
1574#
1575# require util;
1576# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1577#
1578sub setup_greenstone_env() {
1579 my ($GSDLHOME, $GSDLOS) = @_;
1580
1581 #my %env_map = ();
1582 # Get the localised ENV settings of running a localised source setup.bash
1583 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1584 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1585 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1586 if($GSDLOS =~ m/windows/i) {
1587 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1588 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1589 }
1590 if (!open(PIN, "$perl_command |")) {
1591 print STDERR ("Unable to execute command: $perl_command. $!\n");
1592 }
1593
1594 while (defined (my $perl_output_line = <PIN>)) {
1595 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1596 #$env_map{$key}=$value;
1597 $ENV{$key}=$value;
1598 }
1599 close (PIN);
1600
1601 # If any keys in $ENV don't occur in Greenstone's localised env
1602 # (stored in $env_map), delete those entries from $ENV
1603 #foreach $key (keys %ENV) {
1604 # if(!defined $env_map{$key}) {
1605 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1606 # delete $ENV{$key}; # del $ENV(key, value) pair
1607 # }
1608 #}
1609 #undef %env_map;
1610}
1611
1612sub get_perl_exec() {
1613 my $perl_exec = $^X; # may return just "perl"
1614
1615 if($ENV{'PERLPATH'}) {
1616 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1617 if($ENV{'GSDLOS'} =~ m/windows/) {
1618 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1619 } else {
1620 $perl_exec = "$ENV{'PERLPATH'}/perl";
1621 }
1622 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1623 # containing the full path to the current perl executable we're using
1624 $perl_exec = $Config{perlpath}; # configured path for perl
1625 if (!-e $perl_exec) { # may not point to location on this machine
1626 $perl_exec = $^X; # may return just "perl"
1627 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1628 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1629 }
1630 }
1631 }
1632
1633 return $perl_exec;
1634}
1635
1636
16371;
Note: See TracBrowser for help on using the repository browser.