source: main/trunk/greenstone2/perllib/util.pm@ 23306

Last change on this file since 23306 was 23306, checked in by sjm84, 13 years ago

setup_greenstone_evn sub to help set up the environment if GSDLHOME and GSDLOS are already known. This perl subrouting will call setup.bash/bat and manually set the environment vars that setup would normally set. May need expanding if we want to restrict things to only those environment vars that the Greenstone setup script sets.

  • Property svn:keywords set to Author Date Id Revision
File size: 38.2 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31use strict;
32
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61# removes files (but not directories) - can rename this to the default
62# "rm" subroutine when debugging the deletion of individual files.
63sub rm_debug {
64 my (@files) = @_;
65 my @filefiles = ();
66
67 # make sure the files we want to delete exist
68 # and are regular files
69 foreach my $file (@files) {
70 if (!-e $file) {
71 print STDERR "util::rm $file does not exist\n";
72 } elsif ((!-f $file) && (!-l $file)) {
73 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
74 } else { # debug message
75 unlink($file) or warn "Could not delete file $file: $!\n";
76 }
77 }
78}
79
80
81# recursive removal
82sub filtered_rm_r {
83 my ($files,$file_accept_re,$file_reject_re) = @_;
84
85 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
86
87 # recursively remove the files
88 foreach my $file (@files_array) {
89 $file =~ s/[\/\\]+$//; # remove trailing slashes
90
91 if (!-e $file) {
92 print STDERR "util::filtered_rm_r $file does not exist\n";
93
94 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
95 # get the contents of this directory
96 if (!opendir (INDIR, $file)) {
97 print STDERR "util::filtered_rm_r could not open directory $file\n";
98 } else {
99 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
100 closedir (INDIR);
101
102 # remove all the files in this directory
103 map {$_="$file/$_";} @filedir;
104 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
105
106 if (!defined $file_accept_re && !defined $file_reject_re) {
107 # remove this directory
108 if (!rmdir $file) {
109 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
110 }
111 }
112 }
113 } else {
114 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
115
116 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
117 # remove this file
118 &rm ($file);
119 }
120 }
121 }
122}
123
124
125# recursive removal
126sub rm_r {
127 my (@files) = @_;
128
129 # use the more general (but reterospectively written function
130 # filtered_rm_r function()
131
132 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
133}
134
135
136
137
138# moves a file or a group of files
139sub mv {
140 my $dest = pop (@_);
141 my (@srcfiles) = @_;
142
143 # remove trailing slashes from source and destination files
144 $dest =~ s/[\\\/]+$//;
145 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
146
147 # a few sanity checks
148 if (scalar (@srcfiles) == 0) {
149 print STDERR "util::mv no destination directory given\n";
150 return;
151 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
152 print STDERR "util::mv if multiple source files are given the ".
153 "destination must be a directory\n";
154 return;
155 }
156
157 # move the files
158 foreach my $file (@srcfiles) {
159 my $tempdest = $dest;
160 if (-d $tempdest) {
161 my ($filename) = $file =~ /([^\\\/]+)$/;
162 $tempdest .= "/$filename";
163 }
164 if (!-e $file) {
165 print STDERR "util::mv $file does not exist\n";
166 } else {
167 rename ($file, $tempdest);
168 }
169 }
170}
171
172
173# copies a file or a group of files
174sub cp {
175 my $dest = pop (@_);
176 my (@srcfiles) = @_;
177
178 # remove trailing slashes from source and destination files
179 $dest =~ s/[\\\/]+$//;
180 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
181
182 # a few sanity checks
183 if (scalar (@srcfiles) == 0) {
184 print STDERR "util::cp no destination directory given\n";
185 return;
186 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
187 print STDERR "util::cp if multiple source files are given the ".
188 "destination must be a directory\n";
189 return;
190 }
191
192 # copy the files
193 foreach my $file (@srcfiles) {
194 my $tempdest = $dest;
195 if (-d $tempdest) {
196 my ($filename) = $file =~ /([^\\\/]+)$/;
197 $tempdest .= "/$filename";
198 }
199 if (!-e $file) {
200 print STDERR "util::cp $file does not exist\n";
201 } elsif (!-f $file) {
202 print STDERR "util::cp $file is not a plain file\n";
203 } else {
204 &File::Copy::copy ($file, $tempdest);
205 }
206 }
207}
208
209
210
211# recursively copies a file or group of files
212# syntax: cp_r (sourcefiles, destination directory)
213# destination must be a directory - to copy one file to
214# another use cp instead
215sub cp_r {
216 my $dest = pop (@_);
217 my (@srcfiles) = @_;
218
219 # a few sanity checks
220 if (scalar (@srcfiles) == 0) {
221 print STDERR "util::cp_r no destination directory given\n";
222 return;
223 } elsif (-f $dest) {
224 print STDERR "util::cp_r destination must be a directory\n";
225 return;
226 }
227
228 # create destination directory if it doesn't exist already
229 if (! -d $dest) {
230 my $store_umask = umask(0002);
231 mkdir ($dest, 0777);
232 umask($store_umask);
233 }
234
235 # copy the files
236 foreach my $file (@srcfiles) {
237
238 if (!-e $file) {
239 print STDERR "util::cp_r $file does not exist\n";
240
241 } elsif (-d $file) {
242 # make the new directory
243 my ($filename) = $file =~ /([^\\\/]*)$/;
244 $dest = &util::filename_cat ($dest, $filename);
245 my $store_umask = umask(0002);
246 mkdir ($dest, 0777);
247 umask($store_umask);
248
249 # get the contents of this directory
250 if (!opendir (INDIR, $file)) {
251 print STDERR "util::cp_r could not open directory $file\n";
252 } else {
253 my @filedir = readdir (INDIR);
254 closedir (INDIR);
255 foreach my $f (@filedir) {
256 next if $f =~ /^\.\.?$/;
257 # copy all the files in this directory
258 my $ff = &util::filename_cat ($file, $f);
259 &cp_r ($ff, $dest);
260 }
261 }
262
263 } else {
264 &cp($file, $dest);
265 }
266 }
267}
268# recursively copies a file or group of files
269# syntax: cp_r (sourcefiles, destination directory)
270# destination must be a directory - to copy one file to
271# another use cp instead
272sub cp_r_nosvn {
273 my $dest = pop (@_);
274 my (@srcfiles) = @_;
275
276 # a few sanity checks
277 if (scalar (@srcfiles) == 0) {
278 print STDERR "util::cp_r no destination directory given\n";
279 return;
280 } elsif (-f $dest) {
281 print STDERR "util::cp_r destination must be a directory\n";
282 return;
283 }
284
285 # create destination directory if it doesn't exist already
286 if (! -d $dest) {
287 my $store_umask = umask(0002);
288 mkdir ($dest, 0777);
289 umask($store_umask);
290 }
291
292 # copy the files
293 foreach my $file (@srcfiles) {
294
295 if (!-e $file) {
296 print STDERR "util::cp_r $file does not exist\n";
297
298 } elsif (-d $file) {
299 # make the new directory
300 my ($filename) = $file =~ /([^\\\/]*)$/;
301 $dest = &util::filename_cat ($dest, $filename);
302 my $store_umask = umask(0002);
303 mkdir ($dest, 0777);
304 umask($store_umask);
305
306 # get the contents of this directory
307 if (!opendir (INDIR, $file)) {
308 print STDERR "util::cp_r could not open directory $file\n";
309 } else {
310 my @filedir = readdir (INDIR);
311 closedir (INDIR);
312 foreach my $f (@filedir) {
313 next if $f =~ /^\.\.?$/;
314 next if $f =~ /^\.svn$/;
315 # copy all the files in this directory
316 my $ff = &util::filename_cat ($file, $f);
317 &cp_r ($ff, $dest);
318 }
319 }
320
321 } else {
322 &cp($file, $dest);
323 }
324 }
325}
326
327# copies a directory and its contents, excluding subdirectories, into a new directory
328sub cp_r_toplevel {
329 my $dest = pop (@_);
330 my (@srcfiles) = @_;
331
332 # a few sanity checks
333 if (scalar (@srcfiles) == 0) {
334 print STDERR "util::cp_r no destination directory given\n";
335 return;
336 } elsif (-f $dest) {
337 print STDERR "util::cp_r destination must be a directory\n";
338 return;
339 }
340
341 # create destination directory if it doesn't exist already
342 if (! -d $dest) {
343 my $store_umask = umask(0002);
344 mkdir ($dest, 0777);
345 umask($store_umask);
346 }
347
348 # copy the files
349 foreach my $file (@srcfiles) {
350
351 if (!-e $file) {
352 print STDERR "util::cp_r $file does not exist\n";
353
354 } elsif (-d $file) {
355 # make the new directory
356 my ($filename) = $file =~ /([^\\\/]*)$/;
357 $dest = &util::filename_cat ($dest, $filename);
358 my $store_umask = umask(0002);
359 mkdir ($dest, 0777);
360 umask($store_umask);
361
362 # get the contents of this directory
363 if (!opendir (INDIR, $file)) {
364 print STDERR "util::cp_r could not open directory $file\n";
365 } else {
366 my @filedir = readdir (INDIR);
367 closedir (INDIR);
368 foreach my $f (@filedir) {
369 next if $f =~ /^\.\.?$/;
370
371 # copy all the files in this directory, but not directories
372 my $ff = &util::filename_cat ($file, $f);
373 if (-f $ff) {
374 &cp($ff, $dest);
375 #&cp_r ($ff, $dest);
376 }
377 }
378 }
379
380 } else {
381 &cp($file, $dest);
382 }
383 }
384}
385
386sub mk_dir {
387 my ($dir) = @_;
388
389 my $store_umask = umask(0002);
390 my $mkdir_ok = mkdir ($dir, 0777);
391 umask($store_umask);
392
393 if (!$mkdir_ok)
394 {
395 print STDERR "util::mk_dir could not create directory $dir\n";
396 return;
397 }
398}
399
400# in case anyone cares - I did some testing (using perls Benchmark module)
401# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
402# slightly faster (surprisingly) - Stefan.
403sub mk_all_dir {
404 my ($dir) = @_;
405
406 # use / for the directory separator, remove duplicate and
407 # trailing slashes
408 $dir=~s/[\\\/]+/\//g;
409 $dir=~s/[\\\/]+$//;
410
411 # make sure the cache directory exists
412 my $dirsofar = "";
413 my $first = 1;
414 foreach my $dirname (split ("/", $dir)) {
415 $dirsofar .= "/" unless $first;
416 $first = 0;
417
418 $dirsofar .= $dirname;
419
420 next if $dirname =~ /^(|[a-z]:)$/i;
421 if (!-e $dirsofar)
422 {
423 my $store_umask = umask(0002);
424 my $mkdir_ok = mkdir ($dirsofar, 0777);
425 umask($store_umask);
426 if (!$mkdir_ok)
427 {
428 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
429 return;
430 }
431 }
432 }
433}
434
435# make hard link to file if supported by OS, otherwise copy the file
436sub hard_link {
437 my ($src, $dest, $verbosity) = @_;
438
439 # remove trailing slashes from source and destination files
440 $src =~ s/[\\\/]+$//;
441 $dest =~ s/[\\\/]+$//;
442
443 # a few sanity checks
444 if (-e $dest) {
445 # destination file already exists
446 return;
447 }
448 elsif (!-e $src) {
449 print STDERR "util::hard_link source file $src does not exist\n";
450 return 1;
451 }
452 elsif (-d $src) {
453 print STDERR "util::hard_link source $src is a directory\n";
454 return 1;
455 }
456
457 my $dest_dir = &File::Basename::dirname($dest);
458 mk_all_dir($dest_dir) if (!-e $dest_dir);
459
460
461 if (!link($src, $dest)) {
462 if ((!defined $verbosity) || ($verbosity>2)) {
463 print STDERR "util::hard_link: unable to create hard link. ";
464 print STDERR " Copying file: $src -> $dest\n";
465 }
466 &File::Copy::copy ($src, $dest);
467 }
468 return 0;
469}
470
471# make soft link to file if supported by OS, otherwise copy file
472sub soft_link {
473 my ($src, $dest, $ensure_paths_absolute) = @_;
474
475 # remove trailing slashes from source and destination files
476 $src =~ s/[\\\/]+$//;
477 $dest =~ s/[\\\/]+$//;
478
479 # Ensure file paths are absolute IF requested to do so
480 # Soft_linking didn't work for relative paths
481 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
482 # We need to ensure that the src file is the absolute path
483 # See http://perldoc.perl.org/File/Spec.html
484 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
485 $src = File::Spec->rel2abs($src); # make absolute
486 }
487 # Might as well ensure that the destination file's absolute path is used
488 if(!File::Spec->file_name_is_absolute( $dest )) {
489 $dest = File::Spec->rel2abs($dest); # make absolute
490 }
491 }
492
493 # a few sanity checks
494 if (!-e $src) {
495 print STDERR "util::soft_link source file $src does not exist\n";
496 return 0;
497 }
498
499 my $dest_dir = &File::Basename::dirname($dest);
500 mk_all_dir($dest_dir) if (!-e $dest_dir);
501
502 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
503 # symlink not supported on windows
504 &File::Copy::copy ($src, $dest);
505
506 } elsif (!eval {symlink($src, $dest)}) {
507 print STDERR "util::soft_link: unable to create soft link.\n";
508 return 0;
509 }
510
511 return 1;
512}
513
514
515
516
517# updates a copy of a directory in some other part of the filesystem
518# verbosity settings are: 0=low, 1=normal, 2=high
519# both $fromdir and $todir should be absolute paths
520sub cachedir {
521 my ($fromdir, $todir, $verbosity) = @_;
522 $verbosity = 1 unless defined $verbosity;
523
524 # use / for the directory separator, remove duplicate and
525 # trailing slashes
526 $fromdir=~s/[\\\/]+/\//g;
527 $fromdir=~s/[\\\/]+$//;
528 $todir=~s/[\\\/]+/\//g;
529 $todir=~s/[\\\/]+$//;
530
531 &mk_all_dir ($todir);
532
533 # get the directories in ascending order
534 if (!opendir (FROMDIR, $fromdir)) {
535 print STDERR "util::cachedir could not read directory $fromdir\n";
536 return;
537 }
538 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
539 closedir (FROMDIR);
540
541 if (!opendir (TODIR, $todir)) {
542 print STDERR "util::cacedir could not read directory $todir\n";
543 return;
544 }
545 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
546 closedir (TODIR);
547
548 my $fromi = 0;
549 my $toi = 0;
550
551 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
552# print "fromi: $fromi toi: $toi\n";
553
554 # see if we should delete a file/directory
555 # this should happen if the file/directory
556 # is not in the from list or if its a different
557 # size, or has an older timestamp
558 if ($toi < scalar(@todir)) {
559 if (($fromi >= scalar(@fromdir)) ||
560 ($todir[$toi] lt $fromdir[$fromi] ||
561 ($todir[$toi] eq $fromdir[$fromi] &&
562 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
563 $verbosity)))) {
564
565 # the files are different
566 &rm_r("$todir/$todir[$toi]");
567 splice(@todir, $toi, 1); # $toi stays the same
568
569 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
570 # the files are the same
571 # if it is a directory, check its contents
572 if (-d "$todir/$todir[$toi]") {
573 &cachedir ("$fromdir/$fromdir[$fromi]",
574 "$todir/$todir[$toi]", $verbosity);
575 }
576
577 $toi++;
578 $fromi++;
579 next;
580 }
581 }
582
583 # see if we should insert a file/directory
584 # we should insert a file/directory if there
585 # is no tofiles left or if the tofile does not exist
586 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
587 $todir[$toi] gt $fromdir[$fromi])) {
588 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
589 splice (@todir, $toi, 0, $fromdir[$fromi]);
590
591 $toi++;
592 $fromi++;
593 }
594 }
595}
596
597# this function returns -1 if either file is not found
598# assumes that $file1 and $file2 are absolute file names or
599# in the current directory
600# $file2 is allowed to be newer than $file1
601sub differentfiles {
602 my ($file1, $file2, $verbosity) = @_;
603 $verbosity = 1 unless defined $verbosity;
604
605 $file1 =~ s/\/+$//;
606 $file2 =~ s/\/+$//;
607
608 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
609 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
610
611 return -1 unless (-e $file1 && -e $file2);
612 if ($file1name ne $file2name) {
613 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
614 return 1;
615 }
616
617 my @file1stat = stat ($file1);
618 my @file2stat = stat ($file2);
619
620 if (-d $file1) {
621 if (! -d $file2) {
622 print STDERR "one file is a directory\n" if ($verbosity >= 2);
623 return 1;
624 }
625 return 0;
626 }
627
628 # both must be regular files
629 unless (-f $file1 && -f $file2) {
630 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
631 return 1;
632 }
633
634 # the size of the files must be the same
635 if ($file1stat[7] != $file2stat[7]) {
636 print STDERR "different sized files\n" if ($verbosity >= 2);
637 return 1;
638 }
639
640 # the second file cannot be older than the first
641 if ($file1stat[9] > $file2stat[9]) {
642 print STDERR "file is older\n" if ($verbosity >= 2);
643 return 1;
644 }
645
646 return 0;
647}
648
649
650sub get_tmp_filename
651{
652 my $file_ext = shift(@_) || undef;
653
654 my $opt_dot_file_ext = "";
655 if (defined $file_ext) {
656 if ($file_ext !~ m/\./) {
657 # no dot, so needs one added in at start
658 $opt_dot_file_ext = ".$file_ext"
659 }
660 else {
661 # allow for "extensions" such as _metadata.txt to be handled
662 # gracefully
663 $opt_dot_file_ext = $file_ext;
664 }
665 }
666
667 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
668 &mk_all_dir ($tmpdir) unless -e $tmpdir;
669
670 my $count = 1000;
671 my $rand = int(rand $count);
672 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
673
674 while (-e $full_tmp_filename) {
675 $rand = int(rand $count);
676 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
677 $count++;
678 }
679
680 return $full_tmp_filename;
681}
682
683sub get_timestamped_tmp_folder
684{
685
686 my $tmp_dirname;
687 if(defined $ENV{'GSDLCOLLECTDIR'}) {
688 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
689 } elsif(defined $ENV{'GSDLHOME'}) {
690 $tmp_dirname = $ENV{'GSDLHOME'};
691 } else {
692 return undef;
693 }
694
695 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
696 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
697
698 # add the timestamp into the path otherwise we can run into problems
699 # if documents have the same name
700 my $timestamp = time;
701 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
702 $tmp_dirname = $time_tmp_dirname;
703 my $i = 1;
704 while (-e $tmp_dirname) {
705 $tmp_dirname = "$time_tmp_dirname$i";
706 $i++;
707 }
708 &util::mk_dir($tmp_dirname);
709
710 return $tmp_dirname;
711}
712
713sub get_timestamped_tmp_filename_in_collection
714{
715
716 my ($input_filename, $output_ext) = @_;
717 # derive tmp filename from input filename
718 my ($tailname, $dirname, $suffix)
719 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
720
721 # softlink to collection tmp dir
722 my $tmp_dirname = &util::get_timestamped_tmp_folder();
723 $tmp_dirname = $dirname unless defined $tmp_dirname;
724
725 # following two steps copied from ConvertBinaryFile
726 # do we need them?? can't use them as is, as they use plugin methods.
727
728 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
729
730 # URLEncode this since htmls with images where the html filename is utf8 don't seem
731 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
732 # files on the filesystem.
733 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
734 if (defined $output_ext) {
735 $output_ext = ".$output_ext"; # add the dot
736 } else {
737 $output_ext = $suffix;
738 }
739 $output_ext= lc($output_ext);
740 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
741
742 return $tmp_filename;
743}
744
745sub get_toplevel_tmp_dir
746{
747 return filename_cat($ENV{'GSDLHOME'}, "tmp");
748}
749
750
751sub filename_to_regex {
752 my $filename = shift (@_);
753
754 # need to put single backslash back to double so that regex works
755 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
756 $filename =~ s/\\/\\\\/g;
757 }
758 return $filename;
759}
760
761sub filename_cat {
762 my $first_file = shift(@_);
763 my (@filenames) = @_;
764
765# Useful for debugging
766# -- might make sense to call caller(0) rather than (1)??
767# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
768# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
769
770 # If first_file is not null or empty, then add it back into the list
771 if (defined $first_file && $first_file =~ /\S/) {
772 unshift(@filenames, $first_file);
773 }
774
775 my $filename = join("/", @filenames);
776
777 # remove duplicate slashes and remove the last slash
778 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
779 $filename =~ s/[\\\/]+/\\/g;
780 } else {
781 $filename =~ s/[\/]+/\//g;
782 # DB: want a filename abc\de.html to remain like this
783 }
784 $filename =~ s/[\\\/]$//;
785
786 return $filename;
787}
788
789
790sub pathname_cat {
791 my $first_path = shift(@_);
792 my (@pathnames) = @_;
793
794 # If first_path is not null or empty, then add it back into the list
795 if (defined $first_path && $first_path =~ /\S/) {
796 unshift(@pathnames, $first_path);
797 }
798
799 my $join_char;
800 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
801 $join_char = ";";
802 } else {
803 $join_char = ":";
804 }
805
806 my $pathname = join($join_char, @pathnames);
807
808 # remove duplicate slashes
809 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
810 $pathname =~ s/[\\\/]+/\\/g;
811 } else {
812 $pathname =~ s/[\/]+/\//g;
813 # DB: want a pathname abc\de.html to remain like this
814 }
815
816 return $pathname;
817}
818
819
820sub tidy_up_oid {
821 my ($OID) = @_;
822 if ($OID =~ /\./) {
823 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
824 $OID =~ s/\.//g; #remove any periods
825 }
826 if ($OID =~ /^\s.*\s$/) {
827 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
828 # remove starting and trailing whitespace
829 $OID =~ s/^\s+//;
830 $OID =~ s/\s+$//;
831 }
832 if ($OID =~ /^[\d]*$/) {
833 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
834 $OID = "D" . $OID;
835 }
836
837 return $OID;
838}
839sub envvar_prepend {
840 my ($var,$val) = @_;
841
842 # do not prepend any value/path that's already in the environment variable
843 if ($ENV{'GSDLOS'} =~ /^windows$/i)
844 {
845 my $escaped_val = $val;
846 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
847 if (!defined($ENV{$var})) {
848 $ENV{$var} = "$val";
849 }
850 elsif($ENV{$var} !~ m/$escaped_val/) {
851 $ENV{$var} = "$val;".$ENV{$var};
852 }
853 }
854 else {
855 if (!defined($ENV{$var})) {
856 $ENV{$var} = "$val";
857 }
858 elsif($ENV{$var} !~ m/$val/) {
859 $ENV{$var} = "$val:".$ENV{$var};
860 }
861 }
862}
863
864sub envvar_append {
865 my ($var,$val) = @_;
866
867 # do not append any value/path that's already in the environment variable
868 if ($ENV{'GSDLOS'} =~ /^windows$/i)
869 {
870 my $escaped_val = $val;
871 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
872 if (!defined($ENV{$var})) {
873 $ENV{$var} = "$val";
874 }
875 elsif($ENV{$var} !~ m/$escaped_val/) {
876 $ENV{$var} .= ";$val";
877 }
878 }
879 else {
880 if (!defined($ENV{$var})) {
881 $ENV{$var} = "$val";
882 }
883 elsif($ENV{$var} !~ m/$val/) {
884 $ENV{$var} .= ":$val";
885 }
886 }
887}
888
889
890# splits a filename into a prefix and a tail extension using the tail_re, or
891# if that fails, splits on the file_extension . (dot)
892sub get_prefix_and_tail_by_regex {
893
894 my ($filename,$tail_re) = @_;
895
896 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
897 if ((!defined $file_prefix) || (!defined $file_ext)) {
898 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
899 }
900
901 return ($file_prefix,$file_ext);
902}
903
904# get full path and file only path from a base_dir (which may be empty) and
905# file (which may contain directories)
906sub get_full_filenames {
907 my ($base_dir, $file) = @_;
908
909 my $filename_full_path = $file;
910 # add on directory if present
911 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
912
913 my $filename_no_path = $file;
914
915 # remove directory if present
916 $filename_no_path =~ s/^.*[\/\\]//;
917 return ($filename_full_path, $filename_no_path);
918}
919
920# returns the path of a file without the filename -- ie. the directory the file is in
921sub filename_head {
922 my $filename = shift(@_);
923
924 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
925 $filename =~ s/[^\\\\]*$//;
926 }
927 else {
928 $filename =~ s/[^\\\/]*$//;
929 }
930
931 return $filename;
932}
933
934
935# returns 1 if filename1 and filename2 point to the same
936# file or directory
937sub filenames_equal {
938 my ($filename1, $filename2) = @_;
939
940 # use filename_cat to clean up trailing slashes and
941 # multiple slashes
942 $filename1 = filename_cat ($filename1);
943 $filename2 = filename_cat ($filename2);
944
945 # filenames not case sensitive on windows
946 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
947 $filename1 =~ tr/[A-Z]/[a-z]/;
948 $filename2 =~ tr/[A-Z]/[a-z]/;
949 }
950 return 1 if $filename1 eq $filename2;
951 return 0;
952}
953
954sub filename_within_collection
955{
956 my ($filename) = @_;
957
958 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
959
960 if (defined $collect_dir) {
961 my $dirsep = &util::get_dirsep();
962 if ($collect_dir !~ m/$dirsep$/) {
963 $collect_dir .= $dirsep;
964 }
965
966 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
967
968 # if from within GSDLCOLLECTDIR, then remove directory prefix
969 # so source_filename is realative to it. This is done to aid
970 # portability, i.e. the collection can be moved to somewhere
971 # else on the file system and the archives directory will still
972 # work. This is needed, for example in the applet version of
973 # GLI where GSDLHOME/collect on the server will be different to
974 # the collect directory of the remove user. Of course,
975 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
976 # it back into a full pathname.
977
978 if ($filename =~ /^$collect_dir(.*)$/) {
979 $filename = $1;
980 }
981 }
982
983 return $filename;
984}
985
986sub filename_is_absolute
987{
988 my ($filename) = @_;
989
990 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
991 return ($filename =~ m/^(\w:)?\\/);
992 }
993 else {
994 return ($filename =~ m/^\//);
995 }
996}
997
998
999## @method make_absolute()
1000#
1001# Ensure the given file path is absolute in respect to the given base path.
1002#
1003# @param $base_dir A string denoting the base path the given dir must be
1004# absolute to.
1005# @param $dir The directory to be made absolute as a string. Note that the
1006# dir may already be absolute, in which case it will remain
1007# unchanged.
1008# @return The now absolute form of the directory as a string.
1009#
1010# @author John Thompson, DL Consulting Ltd.
1011# @copy 2006 DL Consulting Ltd.
1012#
1013#used in buildcol.pl, doesn't work for all cases --kjdon
1014sub make_absolute {
1015
1016 my ($base_dir, $dir) = @_;
1017### print STDERR "dir = $dir\n";
1018 $dir =~ s/[\\\/]+/\//g;
1019 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1020 $dir =~ s|^/tmp_mnt||;
1021 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1022 $dir =~ s|/[.][.]?/|/|g;
1023 $dir =~ tr|/|/|s;
1024### print STDERR "dir = $dir\n";
1025
1026 return $dir;
1027}
1028## make_absolute() ##
1029
1030sub get_dirsep {
1031
1032 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1033 return "\\";
1034 } else {
1035 return "\/";
1036 }
1037}
1038
1039sub get_os_dirsep {
1040
1041 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1042 return "\\\\";
1043 } else {
1044 return "\\\/";
1045 }
1046}
1047
1048sub get_re_dirsep {
1049
1050 return "\\\\|\\\/";
1051}
1052
1053
1054sub get_dirsep_tail {
1055 my ($filename) = @_;
1056
1057 # returns last part of directory or filename
1058 # On unix e.g. a/b.d => b.d
1059 # a/b/c => c
1060
1061 my $dirsep = get_re_dirsep();
1062 my @dirs = split (/$dirsep/, $filename);
1063 my $tail = pop @dirs;
1064
1065 # - caused problems under windows
1066 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1067
1068 return $tail;
1069}
1070
1071
1072# if this is running on windows we want binaries to end in
1073# .exe, otherwise they don't have to end in any extension
1074sub get_os_exe {
1075 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1076 return "";
1077}
1078
1079
1080# test to see whether this is a big or little endian machine
1081sub is_little_endian
1082{
1083 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1084 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1085 # Otherwise, it's little endian
1086
1087 #return 0 if $^O =~ /^darwin$/i;
1088 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1089
1090 # Going back to stating exactly whether the machine is little endian
1091 # or big endian, without any special case for Macs. Since for rata it comes
1092 # back with little endian and for shuttle with bigendian.
1093 return (ord(substr(pack("s",1), 0, 1)) == 1);
1094}
1095
1096
1097# will return the collection name if successful, "" otherwise
1098sub use_collection {
1099 my ($collection, $collectdir) = @_;
1100
1101 if (!defined $collectdir || $collectdir eq "") {
1102 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1103 }
1104
1105 # get and check the collection
1106 if (!defined($collection) || $collection eq "") {
1107 if (defined $ENV{'GSDLCOLLECTION'}) {
1108 $collection = $ENV{'GSDLCOLLECTION'};
1109 } else {
1110 print STDOUT "No collection specified\n";
1111 return "";
1112 }
1113 }
1114
1115 if ($collection eq "modelcol") {
1116 print STDOUT "You can't use modelcol.\n";
1117 return "";
1118 }
1119
1120 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1121 # are defined
1122 $ENV{'GSDLCOLLECTION'} = $collection;
1123 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1124
1125 # make sure this collection exists
1126 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1127 print STDOUT "Invalid collection ($collection).\n";
1128 return "";
1129 }
1130
1131 # everything is ready to go
1132 return $collection;
1133}
1134
1135sub get_current_collection_name {
1136 return $ENV{'GSDLCOLLECTION'};
1137}
1138
1139
1140# will return the collection name if successful, "" otherwise.
1141# Like use_collection (above) but for greenstone 3 (taking account of site level)
1142
1143sub use_site_collection {
1144 my ($site, $collection, $collectdir) = @_;
1145
1146 if (!defined $collectdir || $collectdir eq "") {
1147 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1148 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1149 }
1150
1151 # collectdir explicitly set by this point (using $site variable if required).
1152 # Can call "old" gsdl2 use_collection now.
1153
1154 return use_collection($collection,$collectdir);
1155}
1156
1157
1158
1159sub locate_config_file
1160{
1161 my ($file) = @_;
1162
1163 my $locations = locate_config_files($file);
1164
1165 return shift @$locations; # returns undef if 'locations' is empty
1166}
1167
1168
1169sub locate_config_files
1170{
1171 my ($file) = @_;
1172
1173 my @locations = ();
1174
1175 if (-e $file) {
1176 # Clearly specified (most likely full filename)
1177 # No need to hunt in 'etc' directories, return value unchanged
1178 push(@locations,$file);
1179 }
1180 else {
1181 # Check for collection specific one before looking in global GSDL 'etc'
1182 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1183 my $test_collect_etc_filename
1184 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1185
1186 if (-e $test_collect_etc_filename) {
1187 push(@locations,$test_collect_etc_filename);
1188 }
1189 }
1190 my $test_main_etc_filename
1191 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1192 if (-e $test_main_etc_filename) {
1193 push(@locations,$test_main_etc_filename);
1194 }
1195 }
1196
1197 return \@locations;
1198}
1199
1200
1201sub hyperlink_text
1202{
1203 my ($text) = @_;
1204
1205 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1206 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1207
1208 return $text;
1209}
1210
1211
1212# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1213# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1214sub is_dir_empty
1215{
1216 my ($path) = @_;
1217 opendir DIR, $path;
1218 while(my $entry = readdir DIR) {
1219 next if($entry =~ /^\.\.?$/);
1220 closedir DIR;
1221 return 0;
1222 }
1223 closedir DIR;
1224 return 1;
1225}
1226
1227# Returns the given filename converted using either URL encoding or base64
1228# encoding, as specified by $rename_method. If the given filename has no suffix
1229# (if it is just the tailname), then $no_suffix should be some defined value.
1230# rename_method can be url, none, base64
1231sub rename_file {
1232 my ($filename, $rename_method, $no_suffix) = @_;
1233
1234 if(!$filename) { # undefined or empty string
1235 return $filename;
1236 }
1237
1238 if (!$rename_method) {
1239 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1240 # Debugging information
1241 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1242 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1243 $rename_method = "url";
1244 } elsif($rename_method eq "none") {
1245 return $filename; # would have already been renamed
1246 }
1247
1248 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1249 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1250 ###$filename =~ s/ /_/g;
1251
1252 my ($tailname,$dirname,$suffix);
1253 if($no_suffix) { # given a tailname, no suffix
1254 ($tailname,$dirname) = File::Basename::fileparse($filename);
1255 }
1256 else {
1257 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1258 }
1259 $suffix = "" if !$suffix;
1260
1261 if ($rename_method eq "url") {
1262 $tailname = &unicode::url_encode($tailname);
1263 }
1264 elsif ($rename_method eq "base64") {
1265 $tailname = &unicode::base64_encode($tailname);
1266 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1267 }
1268
1269 $filename = "$tailname$suffix";
1270 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1271
1272 return $filename;
1273}
1274
1275
1276# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1277sub rename_ldb_or_bdb_file {
1278 my ($filename_no_ext) = @_;
1279
1280 my $new_filename = "$filename_no_ext.gdb";
1281 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1282 # try ldb
1283 my $old_filename = "$filename_no_ext.ldb";
1284
1285 if (-f $old_filename) {
1286 print STDERR "Renaming $old_filename to $new_filename\n";
1287 rename ($old_filename, $new_filename)
1288 || print STDERR "Rename failed: $!\n";
1289 return;
1290 }
1291 # try bdb
1292 $old_filename = "$filename_no_ext.bdb";
1293 if (-f $old_filename) {
1294 print STDERR "Renaming $old_filename to $new_filename\n";
1295 rename ($old_filename, $new_filename)
1296 || print STDERR "Rename failed: $!\n";
1297 return;
1298 }
1299}
1300
1301
1302# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1303# By default, /greenstone3 for GS3 or /greenstone for GS2.
1304sub get_greenstone_url_prefix() {
1305 # if already set on a previous occasion, just return that
1306 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1307 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1308
1309 my ($configfile, $urlprefix, $defaultUrlprefix);
1310 my @propertynames = ();
1311
1312 if($ENV{'GSDL3SRCHOME'}) {
1313 $defaultUrlprefix = "/greenstone3";
1314 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1315 push(@propertynames, qw/path\s*\=/);
1316 } else {
1317 $defaultUrlprefix = "/greenstone";
1318 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1319 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1320 }
1321
1322 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1323
1324 if(!$urlprefix) { # no values found for URL prefix, use default values
1325 $urlprefix = $defaultUrlprefix;
1326 } else {
1327 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1328 $urlprefix =~ s/^\///; # remove the starting slash
1329 my @dirs = split(/(\\|\/)/, $urlprefix);
1330 $urlprefix = shift(@dirs);
1331
1332 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1333 $urlprefix = "/$urlprefix";
1334 }
1335 }
1336
1337 # set for the future
1338 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1339# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1340 return $urlprefix;
1341}
1342
1343
1344# Given a config file (xml or java properties file) and a list/array of regular expressions
1345# that represent property names to match on, this function will return the value for the 1st
1346# matching property name. If the return value is undefined, no matching property was found.
1347sub extract_propvalue_from_file() {
1348 my ($configfile, $propertynames) = @_;
1349
1350 my $value;
1351 unless(open(FIN, "<$configfile")) {
1352 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1353 return $value; # not initialised
1354 }
1355
1356 # Read the entire file at once, as one single line, then close it
1357 my $filecontents;
1358 {
1359 local $/ = undef;
1360 $filecontents = <FIN>;
1361 }
1362 close(FIN);
1363
1364 foreach my $regex (@$propertynames) {
1365 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1366 if($value) {
1367 $value =~ s/^\"//; # remove any startquotes
1368 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1369 last; # found value for a matching property, break from loop
1370 }
1371 }
1372
1373 return $value;
1374}
1375
1376# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1377# given that perllib is in @INC in order to invoke this subroutine.
1378# Call as follows -- after setting up INC to include perllib and
1379# after setting up GSDLHOME and GSDLOS:
1380#
1381# require util;
1382# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1383#
1384sub setup_greenstone_env() {
1385 my ($GSDLHOME, $GSDLOS) = @_;
1386
1387 #my %env_map = ();
1388 # Get the localised ENV settings of running a localised source setup.bash
1389 # and put it into the ENV here.
1390 my $perl_command = "(cd $GSDLHOME; . ./setup.bash > /dev/null; env)";
1391 if($GSDLOS =~ m/windows/i) {
1392 #$perl_command = "(cmd && cd $GSDLHOME && setup.bat > nul && set && exit)";
1393 $perl_command = "(cd $GSDLHOME && setup.bat > nul && set)";
1394 }
1395 if (!open(PIN, "$perl_command |")) {
1396 print STDERR ("Unable to execute command: $perl_command. $!\n");
1397 }
1398
1399 while (defined (my $perl_output_line = <PIN>)) {
1400 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1401 #$env_map{$key}=$value;
1402 $ENV{$key}=$value;
1403 }
1404
1405 # If any keys in $ENV don't occur in Greenstone's localised env
1406 # (stored in $env_map), delete those entries from $ENV
1407 #foreach $key (keys %ENV) {
1408 # if(!defined $env_map{$key}) {
1409 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1410 # delete $ENV{$key}; # del $ENV(key, value) pair
1411 # }
1412 #}
1413 #undef %env_map;
1414}
1415
14161;
Note: See TracBrowser for help on using the repository browser.