source: main/trunk/greenstone2/perllib/util.pm@ 23249

Last change on this file since 23249 was 23249, checked in by ak19, 13 years ago

A useful debug version of the rm method which got added in when Dr Bainbridge fixed the mimetype file deleting issue (commit 23248).

  • Property svn:keywords set to Author Date Id Revision
File size: 36.8 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31use strict;
32
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61# removes files (but not directories) - can rename this to the default
62# "rm" subroutine when debugging the deletion of individual files.
63sub rm_debug {
64 my (@files) = @_;
65 my @filefiles = ();
66
67 # make sure the files we want to delete exist
68 # and are regular files
69 foreach my $file (@files) {
70 if (!-e $file) {
71 print STDERR "util::rm $file does not exist\n";
72 } elsif ((!-f $file) && (!-l $file)) {
73 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
74 } else { # debug message
75 unlink($file) or warn "Could not delete file $file: $!\n";
76 }
77 }
78}
79
80
81# recursive removal
82sub filtered_rm_r {
83 my ($files,$file_accept_re,$file_reject_re) = @_;
84
85 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
86
87 # recursively remove the files
88 foreach my $file (@files_array) {
89 $file =~ s/[\/\\]+$//; # remove trailing slashes
90
91 if (!-e $file) {
92 print STDERR "util::filtered_rm_r $file does not exist\n";
93
94 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
95 # get the contents of this directory
96 if (!opendir (INDIR, $file)) {
97 print STDERR "util::filtered_rm_r could not open directory $file\n";
98 } else {
99 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
100 closedir (INDIR);
101
102 # remove all the files in this directory
103 map {$_="$file/$_";} @filedir;
104 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
105
106 if (!defined $file_accept_re && !defined $file_reject_re) {
107 # remove this directory
108 if (!rmdir $file) {
109 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
110 }
111 }
112 }
113 } else {
114 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
115
116 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
117 # remove this file
118 &rm ($file);
119 }
120 }
121 }
122}
123
124
125# recursive removal
126sub rm_r {
127 my (@files) = @_;
128
129 # use the more general (but reterospectively written function
130 # filtered_rm_r function()
131
132 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
133}
134
135
136
137
138# moves a file or a group of files
139sub mv {
140 my $dest = pop (@_);
141 my (@srcfiles) = @_;
142
143 # remove trailing slashes from source and destination files
144 $dest =~ s/[\\\/]+$//;
145 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
146
147 # a few sanity checks
148 if (scalar (@srcfiles) == 0) {
149 print STDERR "util::mv no destination directory given\n";
150 return;
151 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
152 print STDERR "util::mv if multiple source files are given the ".
153 "destination must be a directory\n";
154 return;
155 }
156
157 # move the files
158 foreach my $file (@srcfiles) {
159 my $tempdest = $dest;
160 if (-d $tempdest) {
161 my ($filename) = $file =~ /([^\\\/]+)$/;
162 $tempdest .= "/$filename";
163 }
164 if (!-e $file) {
165 print STDERR "util::mv $file does not exist\n";
166 } else {
167 rename ($file, $tempdest);
168 }
169 }
170}
171
172
173# copies a file or a group of files
174sub cp {
175 my $dest = pop (@_);
176 my (@srcfiles) = @_;
177
178 # remove trailing slashes from source and destination files
179 $dest =~ s/[\\\/]+$//;
180 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
181
182 # a few sanity checks
183 if (scalar (@srcfiles) == 0) {
184 print STDERR "util::cp no destination directory given\n";
185 return;
186 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
187 print STDERR "util::cp if multiple source files are given the ".
188 "destination must be a directory\n";
189 return;
190 }
191
192 # copy the files
193 foreach my $file (@srcfiles) {
194 my $tempdest = $dest;
195 if (-d $tempdest) {
196 my ($filename) = $file =~ /([^\\\/]+)$/;
197 $tempdest .= "/$filename";
198 }
199 if (!-e $file) {
200 print STDERR "util::cp $file does not exist\n";
201 } elsif (!-f $file) {
202 print STDERR "util::cp $file is not a plain file\n";
203 } else {
204 &File::Copy::copy ($file, $tempdest);
205 }
206 }
207}
208
209
210
211# recursively copies a file or group of files
212# syntax: cp_r (sourcefiles, destination directory)
213# destination must be a directory - to copy one file to
214# another use cp instead
215sub cp_r {
216 my $dest = pop (@_);
217 my (@srcfiles) = @_;
218
219 # a few sanity checks
220 if (scalar (@srcfiles) == 0) {
221 print STDERR "util::cp_r no destination directory given\n";
222 return;
223 } elsif (-f $dest) {
224 print STDERR "util::cp_r destination must be a directory\n";
225 return;
226 }
227
228 # create destination directory if it doesn't exist already
229 if (! -d $dest) {
230 my $store_umask = umask(0002);
231 mkdir ($dest, 0777);
232 umask($store_umask);
233 }
234
235 # copy the files
236 foreach my $file (@srcfiles) {
237
238 if (!-e $file) {
239 print STDERR "util::cp_r $file does not exist\n";
240
241 } elsif (-d $file) {
242 # make the new directory
243 my ($filename) = $file =~ /([^\\\/]*)$/;
244 $dest = &util::filename_cat ($dest, $filename);
245 my $store_umask = umask(0002);
246 mkdir ($dest, 0777);
247 umask($store_umask);
248
249 # get the contents of this directory
250 if (!opendir (INDIR, $file)) {
251 print STDERR "util::cp_r could not open directory $file\n";
252 } else {
253 my @filedir = readdir (INDIR);
254 closedir (INDIR);
255 foreach my $f (@filedir) {
256 next if $f =~ /^\.\.?$/;
257 # copy all the files in this directory
258 my $ff = &util::filename_cat ($file, $f);
259 &cp_r ($ff, $dest);
260 }
261 }
262
263 } else {
264 &cp($file, $dest);
265 }
266 }
267}
268# recursively copies a file or group of files
269# syntax: cp_r (sourcefiles, destination directory)
270# destination must be a directory - to copy one file to
271# another use cp instead
272sub cp_r_nosvn {
273 my $dest = pop (@_);
274 my (@srcfiles) = @_;
275
276 # a few sanity checks
277 if (scalar (@srcfiles) == 0) {
278 print STDERR "util::cp_r no destination directory given\n";
279 return;
280 } elsif (-f $dest) {
281 print STDERR "util::cp_r destination must be a directory\n";
282 return;
283 }
284
285 # create destination directory if it doesn't exist already
286 if (! -d $dest) {
287 my $store_umask = umask(0002);
288 mkdir ($dest, 0777);
289 umask($store_umask);
290 }
291
292 # copy the files
293 foreach my $file (@srcfiles) {
294
295 if (!-e $file) {
296 print STDERR "util::cp_r $file does not exist\n";
297
298 } elsif (-d $file) {
299 # make the new directory
300 my ($filename) = $file =~ /([^\\\/]*)$/;
301 $dest = &util::filename_cat ($dest, $filename);
302 my $store_umask = umask(0002);
303 mkdir ($dest, 0777);
304 umask($store_umask);
305
306 # get the contents of this directory
307 if (!opendir (INDIR, $file)) {
308 print STDERR "util::cp_r could not open directory $file\n";
309 } else {
310 my @filedir = readdir (INDIR);
311 closedir (INDIR);
312 foreach my $f (@filedir) {
313 next if $f =~ /^\.\.?$/;
314 next if $f =~ /^\.svn$/;
315 # copy all the files in this directory
316 my $ff = &util::filename_cat ($file, $f);
317 &cp_r ($ff, $dest);
318 }
319 }
320
321 } else {
322 &cp($file, $dest);
323 }
324 }
325}
326
327# copies a directory and its contents, excluding subdirectories, into a new directory
328sub cp_r_toplevel {
329 my $dest = pop (@_);
330 my (@srcfiles) = @_;
331
332 # a few sanity checks
333 if (scalar (@srcfiles) == 0) {
334 print STDERR "util::cp_r no destination directory given\n";
335 return;
336 } elsif (-f $dest) {
337 print STDERR "util::cp_r destination must be a directory\n";
338 return;
339 }
340
341 # create destination directory if it doesn't exist already
342 if (! -d $dest) {
343 my $store_umask = umask(0002);
344 mkdir ($dest, 0777);
345 umask($store_umask);
346 }
347
348 # copy the files
349 foreach my $file (@srcfiles) {
350
351 if (!-e $file) {
352 print STDERR "util::cp_r $file does not exist\n";
353
354 } elsif (-d $file) {
355 # make the new directory
356 my ($filename) = $file =~ /([^\\\/]*)$/;
357 $dest = &util::filename_cat ($dest, $filename);
358 my $store_umask = umask(0002);
359 mkdir ($dest, 0777);
360 umask($store_umask);
361
362 # get the contents of this directory
363 if (!opendir (INDIR, $file)) {
364 print STDERR "util::cp_r could not open directory $file\n";
365 } else {
366 my @filedir = readdir (INDIR);
367 closedir (INDIR);
368 foreach my $f (@filedir) {
369 next if $f =~ /^\.\.?$/;
370
371 # copy all the files in this directory, but not directories
372 my $ff = &util::filename_cat ($file, $f);
373 if (-f $ff) {
374 &cp($ff, $dest);
375 #&cp_r ($ff, $dest);
376 }
377 }
378 }
379
380 } else {
381 &cp($file, $dest);
382 }
383 }
384}
385
386sub mk_dir {
387 my ($dir) = @_;
388
389 my $store_umask = umask(0002);
390 my $mkdir_ok = mkdir ($dir, 0777);
391 umask($store_umask);
392
393 if (!$mkdir_ok)
394 {
395 print STDERR "util::mk_dir could not create directory $dir\n";
396 return;
397 }
398}
399
400# in case anyone cares - I did some testing (using perls Benchmark module)
401# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
402# slightly faster (surprisingly) - Stefan.
403sub mk_all_dir {
404 my ($dir) = @_;
405
406 # use / for the directory separator, remove duplicate and
407 # trailing slashes
408 $dir=~s/[\\\/]+/\//g;
409 $dir=~s/[\\\/]+$//;
410
411 # make sure the cache directory exists
412 my $dirsofar = "";
413 my $first = 1;
414 foreach my $dirname (split ("/", $dir)) {
415 $dirsofar .= "/" unless $first;
416 $first = 0;
417
418 $dirsofar .= $dirname;
419
420 next if $dirname =~ /^(|[a-z]:)$/i;
421 if (!-e $dirsofar)
422 {
423 my $store_umask = umask(0002);
424 my $mkdir_ok = mkdir ($dirsofar, 0777);
425 umask($store_umask);
426 if (!$mkdir_ok)
427 {
428 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
429 return;
430 }
431 }
432 }
433}
434
435# make hard link to file if supported by OS, otherwise copy the file
436sub hard_link {
437 my ($src, $dest, $verbosity) = @_;
438
439 # remove trailing slashes from source and destination files
440 $src =~ s/[\\\/]+$//;
441 $dest =~ s/[\\\/]+$//;
442
443 # a few sanity checks
444 if (-e $dest) {
445 # destination file already exists
446 return;
447 }
448 elsif (!-e $src) {
449 print STDERR "util::hard_link source file $src does not exist\n";
450 return 1;
451 }
452 elsif (-d $src) {
453 print STDERR "util::hard_link source $src is a directory\n";
454 return 1;
455 }
456
457 my $dest_dir = &File::Basename::dirname($dest);
458 mk_all_dir($dest_dir) if (!-e $dest_dir);
459
460
461 if (!link($src, $dest)) {
462 if ((!defined $verbosity) || ($verbosity>2)) {
463 print STDERR "util::hard_link: unable to create hard link. ";
464 print STDERR " Copying file: $src -> $dest\n";
465 }
466 &File::Copy::copy ($src, $dest);
467 }
468 return 0;
469}
470
471# make soft link to file if supported by OS, otherwise copy file
472sub soft_link {
473 my ($src, $dest, $ensure_paths_absolute) = @_;
474
475 # remove trailing slashes from source and destination files
476 $src =~ s/[\\\/]+$//;
477 $dest =~ s/[\\\/]+$//;
478
479 # Ensure file paths are absolute IF requested to do so
480 # Soft_linking didn't work for relative paths
481 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
482 # We need to ensure that the src file is the absolute path
483 # See http://perldoc.perl.org/File/Spec.html
484 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
485 $src = File::Spec->rel2abs($src); # make absolute
486 }
487 # Might as well ensure that the destination file's absolute path is used
488 if(!File::Spec->file_name_is_absolute( $dest )) {
489 $dest = File::Spec->rel2abs($dest); # make absolute
490 }
491 }
492
493 # a few sanity checks
494 if (!-e $src) {
495 print STDERR "util::soft_link source file $src does not exist\n";
496 return 0;
497 }
498
499 my $dest_dir = &File::Basename::dirname($dest);
500 mk_all_dir($dest_dir) if (!-e $dest_dir);
501
502 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
503 # symlink not supported on windows
504 &File::Copy::copy ($src, $dest);
505
506 } elsif (!eval {symlink($src, $dest)}) {
507 print STDERR "util::soft_link: unable to create soft link.\n";
508 return 0;
509 }
510
511 return 1;
512}
513
514
515
516
517# updates a copy of a directory in some other part of the filesystem
518# verbosity settings are: 0=low, 1=normal, 2=high
519# both $fromdir and $todir should be absolute paths
520sub cachedir {
521 my ($fromdir, $todir, $verbosity) = @_;
522 $verbosity = 1 unless defined $verbosity;
523
524 # use / for the directory separator, remove duplicate and
525 # trailing slashes
526 $fromdir=~s/[\\\/]+/\//g;
527 $fromdir=~s/[\\\/]+$//;
528 $todir=~s/[\\\/]+/\//g;
529 $todir=~s/[\\\/]+$//;
530
531 &mk_all_dir ($todir);
532
533 # get the directories in ascending order
534 if (!opendir (FROMDIR, $fromdir)) {
535 print STDERR "util::cachedir could not read directory $fromdir\n";
536 return;
537 }
538 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
539 closedir (FROMDIR);
540
541 if (!opendir (TODIR, $todir)) {
542 print STDERR "util::cacedir could not read directory $todir\n";
543 return;
544 }
545 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
546 closedir (TODIR);
547
548 my $fromi = 0;
549 my $toi = 0;
550
551 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
552# print "fromi: $fromi toi: $toi\n";
553
554 # see if we should delete a file/directory
555 # this should happen if the file/directory
556 # is not in the from list or if its a different
557 # size, or has an older timestamp
558 if ($toi < scalar(@todir)) {
559 if (($fromi >= scalar(@fromdir)) ||
560 ($todir[$toi] lt $fromdir[$fromi] ||
561 ($todir[$toi] eq $fromdir[$fromi] &&
562 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
563 $verbosity)))) {
564
565 # the files are different
566 &rm_r("$todir/$todir[$toi]");
567 splice(@todir, $toi, 1); # $toi stays the same
568
569 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
570 # the files are the same
571 # if it is a directory, check its contents
572 if (-d "$todir/$todir[$toi]") {
573 &cachedir ("$fromdir/$fromdir[$fromi]",
574 "$todir/$todir[$toi]", $verbosity);
575 }
576
577 $toi++;
578 $fromi++;
579 next;
580 }
581 }
582
583 # see if we should insert a file/directory
584 # we should insert a file/directory if there
585 # is no tofiles left or if the tofile does not exist
586 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
587 $todir[$toi] gt $fromdir[$fromi])) {
588 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
589 splice (@todir, $toi, 0, $fromdir[$fromi]);
590
591 $toi++;
592 $fromi++;
593 }
594 }
595}
596
597# this function returns -1 if either file is not found
598# assumes that $file1 and $file2 are absolute file names or
599# in the current directory
600# $file2 is allowed to be newer than $file1
601sub differentfiles {
602 my ($file1, $file2, $verbosity) = @_;
603 $verbosity = 1 unless defined $verbosity;
604
605 $file1 =~ s/\/+$//;
606 $file2 =~ s/\/+$//;
607
608 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
609 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
610
611 return -1 unless (-e $file1 && -e $file2);
612 if ($file1name ne $file2name) {
613 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
614 return 1;
615 }
616
617 my @file1stat = stat ($file1);
618 my @file2stat = stat ($file2);
619
620 if (-d $file1) {
621 if (! -d $file2) {
622 print STDERR "one file is a directory\n" if ($verbosity >= 2);
623 return 1;
624 }
625 return 0;
626 }
627
628 # both must be regular files
629 unless (-f $file1 && -f $file2) {
630 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
631 return 1;
632 }
633
634 # the size of the files must be the same
635 if ($file1stat[7] != $file2stat[7]) {
636 print STDERR "different sized files\n" if ($verbosity >= 2);
637 return 1;
638 }
639
640 # the second file cannot be older than the first
641 if ($file1stat[9] > $file2stat[9]) {
642 print STDERR "file is older\n" if ($verbosity >= 2);
643 return 1;
644 }
645
646 return 0;
647}
648
649
650sub get_tmp_filename
651{
652 my $file_ext = shift(@_) || undef;
653
654 my $opt_dot_file_ext = "";
655 if (defined $file_ext) {
656 if ($file_ext !~ m/\./) {
657 # no dot, so needs one added in at start
658 $opt_dot_file_ext = ".$file_ext"
659 }
660 else {
661 # allow for "extensions" such as _metadata.txt to be handled
662 # gracefully
663 $opt_dot_file_ext = $file_ext;
664 }
665 }
666
667 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
668 &mk_all_dir ($tmpdir) unless -e $tmpdir;
669
670 my $count = 1000;
671 my $rand = int(rand $count);
672 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
673
674 while (-e $full_tmp_filename) {
675 $rand = int(rand $count);
676 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
677 $count++;
678 }
679
680 return $full_tmp_filename;
681}
682
683sub get_timestamped_tmp_folder
684{
685
686 my $tmp_dirname;
687 if(defined $ENV{'GSDLCOLLECTDIR'}) {
688 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
689 } elsif(defined $ENV{'GSDLHOME'}) {
690 $tmp_dirname = $ENV{'GSDLHOME'};
691 } else {
692 return undef;
693 }
694
695 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
696 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
697
698 # add the timestamp into the path otherwise we can run into problems
699 # if documents have the same name
700 my $timestamp = time;
701 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
702 $tmp_dirname = $time_tmp_dirname;
703 my $i = 1;
704 while (-e $tmp_dirname) {
705 $tmp_dirname = "$time_tmp_dirname$i";
706 $i++;
707 }
708 &util::mk_dir($tmp_dirname);
709
710 return $tmp_dirname;
711}
712
713sub get_timestamped_tmp_filename_in_collection
714{
715
716 my ($input_filename, $output_ext) = @_;
717 # derive tmp filename from input filename
718 my ($tailname, $dirname, $suffix)
719 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
720
721 # softlink to collection tmp dir
722 my $tmp_dirname = &util::get_timestamped_tmp_folder();
723 $tmp_dirname = $dirname unless defined $tmp_dirname;
724
725 # following two steps copied from ConvertBinaryFile
726 # do we need them?? can't use them as is, as they use plugin methods.
727
728 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
729
730 # URLEncode this since htmls with images where the html filename is utf8 don't seem
731 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
732 # files on the filesystem.
733 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
734 if (defined $output_ext) {
735 $output_ext = ".$output_ext"; # add the dot
736 } else {
737 $output_ext = $suffix;
738 }
739 $output_ext= lc($output_ext);
740 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
741
742 return $tmp_filename;
743}
744
745sub get_toplevel_tmp_dir
746{
747 return filename_cat($ENV{'GSDLHOME'}, "tmp");
748}
749
750
751sub filename_to_regex {
752 my $filename = shift (@_);
753
754 # need to put single backslash back to double so that regex works
755 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
756 $filename =~ s/\\/\\\\/g;
757 }
758 return $filename;
759}
760
761sub filename_cat {
762 my $first_file = shift(@_);
763 my (@filenames) = @_;
764
765# Useful for debugging
766# -- might make sense to call caller(0) rather than (1)??
767# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
768# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
769
770 # If first_file is not null or empty, then add it back into the list
771 if (defined $first_file && $first_file =~ /\S/) {
772 unshift(@filenames, $first_file);
773 }
774
775 my $filename = join("/", @filenames);
776
777 # remove duplicate slashes and remove the last slash
778 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
779 $filename =~ s/[\\\/]+/\\/g;
780 } else {
781 $filename =~ s/[\/]+/\//g;
782 # DB: want a filename abc\de.html to remain like this
783 }
784 $filename =~ s/[\\\/]$//;
785
786 return $filename;
787}
788
789
790sub pathname_cat {
791 my $first_path = shift(@_);
792 my (@pathnames) = @_;
793
794 # If first_path is not null or empty, then add it back into the list
795 if (defined $first_path && $first_path =~ /\S/) {
796 unshift(@pathnames, $first_path);
797 }
798
799 my $join_char;
800 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
801 $join_char = ";";
802 } else {
803 $join_char = ":";
804 }
805
806 my $pathname = join($join_char, @pathnames);
807
808 # remove duplicate slashes
809 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
810 $pathname =~ s/[\\\/]+/\\/g;
811 } else {
812 $pathname =~ s/[\/]+/\//g;
813 # DB: want a pathname abc\de.html to remain like this
814 }
815
816 return $pathname;
817}
818
819
820sub tidy_up_oid {
821 my ($OID) = @_;
822 if ($OID =~ /\./) {
823 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
824 $OID =~ s/\.//g; #remove any periods
825 }
826 if ($OID =~ /^\s.*\s$/) {
827 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
828 # remove starting and trailing whitespace
829 $OID =~ s/^\s+//;
830 $OID =~ s/\s+$//;
831 }
832 if ($OID =~ /^[\d]*$/) {
833 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
834 $OID = "D" . $OID;
835 }
836
837 return $OID;
838}
839sub envvar_prepend {
840 my ($var,$val) = @_;
841
842 # do not prepend any value/path that's already in the environment variable
843 if ($ENV{'GSDLOS'} =~ /^windows$/i)
844 {
845 my $escaped_val = $val;
846 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
847 if (!defined($ENV{$var})) {
848 $ENV{$var} = "$val";
849 }
850 elsif($ENV{$var} !~ m/$escaped_val/) {
851 $ENV{$var} = "$val;".$ENV{$var};
852 }
853 }
854 else {
855 if (!defined($ENV{$var})) {
856 $ENV{$var} = "$val";
857 }
858 elsif($ENV{$var} !~ m/$val/) {
859 $ENV{$var} = "$val:".$ENV{$var};
860 }
861 }
862}
863
864sub envvar_append {
865 my ($var,$val) = @_;
866
867 # do not append any value/path that's already in the environment variable
868 if ($ENV{'GSDLOS'} =~ /^windows$/i)
869 {
870 my $escaped_val = $val;
871 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
872 if (!defined($ENV{$var})) {
873 $ENV{$var} = "$val";
874 }
875 elsif($ENV{$var} !~ m/$escaped_val/) {
876 $ENV{$var} .= ";$val";
877 }
878 }
879 else {
880 if (!defined($ENV{$var})) {
881 $ENV{$var} = "$val";
882 }
883 elsif($ENV{$var} !~ m/$val/) {
884 $ENV{$var} .= ":$val";
885 }
886 }
887}
888
889
890# splits a filename into a prefix and a tail extension using the tail_re, or
891# if that fails, splits on the file_extension . (dot)
892sub get_prefix_and_tail_by_regex {
893
894 my ($filename,$tail_re) = @_;
895
896 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
897 if ((!defined $file_prefix) || (!defined $file_ext)) {
898 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
899 }
900
901 return ($file_prefix,$file_ext);
902}
903
904# get full path and file only path from a base_dir (which may be empty) and
905# file (which may contain directories)
906sub get_full_filenames {
907 my ($base_dir, $file) = @_;
908
909 my $filename_full_path = $file;
910 # add on directory if present
911 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
912
913 my $filename_no_path = $file;
914
915 # remove directory if present
916 $filename_no_path =~ s/^.*[\/\\]//;
917 return ($filename_full_path, $filename_no_path);
918}
919
920# returns the path of a file without the filename -- ie. the directory the file is in
921sub filename_head {
922 my $filename = shift(@_);
923
924 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
925 $filename =~ s/[^\\\\]*$//;
926 }
927 else {
928 $filename =~ s/[^\\\/]*$//;
929 }
930
931 return $filename;
932}
933
934
935# returns 1 if filename1 and filename2 point to the same
936# file or directory
937sub filenames_equal {
938 my ($filename1, $filename2) = @_;
939
940 # use filename_cat to clean up trailing slashes and
941 # multiple slashes
942 $filename1 = filename_cat ($filename1);
943 $filename2 = filename_cat ($filename2);
944
945 # filenames not case sensitive on windows
946 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
947 $filename1 =~ tr/[A-Z]/[a-z]/;
948 $filename2 =~ tr/[A-Z]/[a-z]/;
949 }
950 return 1 if $filename1 eq $filename2;
951 return 0;
952}
953
954sub filename_within_collection
955{
956 my ($filename) = @_;
957
958 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
959
960 if (defined $collect_dir) {
961 my $dirsep = &util::get_dirsep();
962 if ($collect_dir !~ m/$dirsep$/) {
963 $collect_dir .= $dirsep;
964 }
965
966 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
967
968 # if from within GSDLCOLLECTDIR, then remove directory prefix
969 # so source_filename is realative to it. This is done to aid
970 # portability, i.e. the collection can be moved to somewhere
971 # else on the file system and the archives directory will still
972 # work. This is needed, for example in the applet version of
973 # GLI where GSDLHOME/collect on the server will be different to
974 # the collect directory of the remove user. Of course,
975 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
976 # it back into a full pathname.
977
978 if ($filename =~ /^$collect_dir(.*)$/) {
979 $filename = $1;
980 }
981 }
982
983 return $filename;
984}
985
986sub filename_is_absolute
987{
988 my ($filename) = @_;
989
990 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
991 return ($filename =~ m/^(\w:)?\\/);
992 }
993 else {
994 return ($filename =~ m/^\//);
995 }
996}
997
998
999## @method make_absolute()
1000#
1001# Ensure the given file path is absolute in respect to the given base path.
1002#
1003# @param $base_dir A string denoting the base path the given dir must be
1004# absolute to.
1005# @param $dir The directory to be made absolute as a string. Note that the
1006# dir may already be absolute, in which case it will remain
1007# unchanged.
1008# @return The now absolute form of the directory as a string.
1009#
1010# @author John Thompson, DL Consulting Ltd.
1011# @copy 2006 DL Consulting Ltd.
1012#
1013#used in buildcol.pl, doesn't work for all cases --kjdon
1014sub make_absolute {
1015
1016 my ($base_dir, $dir) = @_;
1017### print STDERR "dir = $dir\n";
1018 $dir =~ s/[\\\/]+/\//g;
1019 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1020 $dir =~ s|^/tmp_mnt||;
1021 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1022 $dir =~ s|/[.][.]?/|/|g;
1023 $dir =~ tr|/|/|s;
1024### print STDERR "dir = $dir\n";
1025
1026 return $dir;
1027}
1028## make_absolute() ##
1029
1030sub get_dirsep {
1031
1032 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1033 return "\\";
1034 } else {
1035 return "\/";
1036 }
1037}
1038
1039sub get_os_dirsep {
1040
1041 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1042 return "\\\\";
1043 } else {
1044 return "\\\/";
1045 }
1046}
1047
1048sub get_re_dirsep {
1049
1050 return "\\\\|\\\/";
1051}
1052
1053
1054sub get_dirsep_tail {
1055 my ($filename) = @_;
1056
1057 # returns last part of directory or filename
1058 # On unix e.g. a/b.d => b.d
1059 # a/b/c => c
1060
1061 my $dirsep = get_re_dirsep();
1062 my @dirs = split (/$dirsep/, $filename);
1063 my $tail = pop @dirs;
1064
1065 # - caused problems under windows
1066 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1067
1068 return $tail;
1069}
1070
1071
1072# if this is running on windows we want binaries to end in
1073# .exe, otherwise they don't have to end in any extension
1074sub get_os_exe {
1075 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1076 return "";
1077}
1078
1079
1080# test to see whether this is a big or little endian machine
1081sub is_little_endian
1082{
1083 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1084 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1085 # Otherwise, it's little endian
1086
1087 #return 0 if $^O =~ /^darwin$/i;
1088 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1089
1090 # Going back to stating exactly whether the machine is little endian
1091 # or big endian, without any special case for Macs. Since for rata it comes
1092 # back with little endian and for shuttle with bigendian.
1093 return (ord(substr(pack("s",1), 0, 1)) == 1);
1094}
1095
1096
1097# will return the collection name if successful, "" otherwise
1098sub use_collection {
1099 my ($collection, $collectdir) = @_;
1100
1101 if (!defined $collectdir || $collectdir eq "") {
1102 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1103 }
1104
1105 # get and check the collection
1106 if (!defined($collection) || $collection eq "") {
1107 if (defined $ENV{'GSDLCOLLECTION'}) {
1108 $collection = $ENV{'GSDLCOLLECTION'};
1109 } else {
1110 print STDOUT "No collection specified\n";
1111 return "";
1112 }
1113 }
1114
1115 if ($collection eq "modelcol") {
1116 print STDOUT "You can't use modelcol.\n";
1117 return "";
1118 }
1119
1120 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1121 # are defined
1122 $ENV{'GSDLCOLLECTION'} = $collection;
1123 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1124
1125 # make sure this collection exists
1126 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1127 print STDOUT "Invalid collection ($collection).\n";
1128 return "";
1129 }
1130
1131 # everything is ready to go
1132 return $collection;
1133}
1134
1135sub get_current_collection_name {
1136 return $ENV{'GSDLCOLLECTION'};
1137}
1138
1139
1140# will return the collection name if successful, "" otherwise.
1141# Like use_collection (above) but for greenstone 3 (taking account of site level)
1142
1143sub use_site_collection {
1144 my ($site, $collection, $collectdir) = @_;
1145
1146 if (!defined $collectdir || $collectdir eq "") {
1147 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1148 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1149 }
1150
1151 # collectdir explicitly set by this point (using $site variable if required).
1152 # Can call "old" gsdl2 use_collection now.
1153
1154 return use_collection($collection,$collectdir);
1155}
1156
1157
1158
1159sub locate_config_file
1160{
1161 my ($file) = @_;
1162
1163 my $locations = locate_config_files($file);
1164
1165 return shift @$locations; # returns undef if 'locations' is empty
1166}
1167
1168
1169sub locate_config_files
1170{
1171 my ($file) = @_;
1172
1173 my @locations = ();
1174
1175 if (-e $file) {
1176 # Clearly specified (most likely full filename)
1177 # No need to hunt in 'etc' directories, return value unchanged
1178 push(@locations,$file);
1179 }
1180 else {
1181 # Check for collection specific one before looking in global GSDL 'etc'
1182 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1183 my $test_collect_etc_filename
1184 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1185
1186 if (-e $test_collect_etc_filename) {
1187 push(@locations,$test_collect_etc_filename);
1188 }
1189 }
1190 my $test_main_etc_filename
1191 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1192 if (-e $test_main_etc_filename) {
1193 push(@locations,$test_main_etc_filename);
1194 }
1195 }
1196
1197 return \@locations;
1198}
1199
1200
1201sub hyperlink_text
1202{
1203 my ($text) = @_;
1204
1205 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1206 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1207
1208 return $text;
1209}
1210
1211
1212# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1213# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1214sub is_dir_empty
1215{
1216 my ($path) = @_;
1217 opendir DIR, $path;
1218 while(my $entry = readdir DIR) {
1219 next if($entry =~ /^\.\.?$/);
1220 closedir DIR;
1221 return 0;
1222 }
1223 closedir DIR;
1224 return 1;
1225}
1226
1227# Returns the given filename converted using either URL encoding or base64
1228# encoding, as specified by $rename_method. If the given filename has no suffix
1229# (if it is just the tailname), then $no_suffix should be some defined value.
1230# rename_method can be url, none, base64
1231sub rename_file {
1232 my ($filename, $rename_method, $no_suffix) = @_;
1233
1234 if(!$filename) { # undefined or empty string
1235 return $filename;
1236 }
1237
1238 if (!$rename_method) {
1239 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1240 # Debugging information
1241 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1242 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1243 $rename_method = "url";
1244 } elsif($rename_method eq "none") {
1245 return $filename; # would have already been renamed
1246 }
1247
1248 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1249 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1250 ###$filename =~ s/ /_/g;
1251
1252 my ($tailname,$dirname,$suffix);
1253 if($no_suffix) { # given a tailname, no suffix
1254 ($tailname,$dirname) = File::Basename::fileparse($filename);
1255 }
1256 else {
1257 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1258 }
1259 $suffix = "" if !$suffix;
1260
1261 if ($rename_method eq "url") {
1262 $tailname = &unicode::url_encode($tailname);
1263 }
1264 elsif ($rename_method eq "base64") {
1265 $tailname = &unicode::base64_encode($tailname);
1266 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1267 }
1268
1269 $filename = "$tailname$suffix";
1270 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1271
1272 return $filename;
1273}
1274
1275
1276# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1277sub rename_ldb_or_bdb_file {
1278 my ($filename_no_ext) = @_;
1279
1280 my $new_filename = "$filename_no_ext.gdb";
1281 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1282 # try ldb
1283 my $old_filename = "$filename_no_ext.ldb";
1284
1285 if (-f $old_filename) {
1286 print STDERR "Renaming $old_filename to $new_filename\n";
1287 rename ($old_filename, $new_filename)
1288 || print STDERR "Rename failed: $!\n";
1289 return;
1290 }
1291 # try bdb
1292 $old_filename = "$filename_no_ext.bdb";
1293 if (-f $old_filename) {
1294 print STDERR "Renaming $old_filename to $new_filename\n";
1295 rename ($old_filename, $new_filename)
1296 || print STDERR "Rename failed: $!\n";
1297 return;
1298 }
1299}
1300
1301
1302# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1303# By default, /greenstone3 for GS3 or /greenstone for GS2.
1304sub get_greenstone_url_prefix() {
1305 # if already set on a previous occasion, just return that
1306 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1307 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1308
1309 my ($configfile, $urlprefix, $defaultUrlprefix);
1310 my @propertynames = ();
1311
1312 if($ENV{'GSDL3SRCHOME'}) {
1313 $defaultUrlprefix = "/greenstone3";
1314 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1315 push(@propertynames, qw/path\s*\=/);
1316 } else {
1317 $defaultUrlprefix = "/greenstone";
1318 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1319 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1320 }
1321
1322 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1323
1324 if(!$urlprefix) { # no values found for URL prefix, use default values
1325 $urlprefix = $defaultUrlprefix;
1326 } else {
1327 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1328 $urlprefix =~ s/^\///; # remove the starting slash
1329 my @dirs = split(/(\\|\/)/, $urlprefix);
1330 $urlprefix = shift(@dirs);
1331
1332 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1333 $urlprefix = "/$urlprefix";
1334 }
1335 }
1336
1337 # set for the future
1338 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1339# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1340 return $urlprefix;
1341}
1342
1343
1344# Given a config file (xml or java properties file) and a list/array of regular expressions
1345# that represent property names to match on, this function will return the value for the 1st
1346# matching property name. If the return value is undefined, no matching property was found.
1347sub extract_propvalue_from_file() {
1348 my ($configfile, $propertynames) = @_;
1349
1350 my $value;
1351 unless(open(FIN, "<$configfile")) {
1352 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1353 return $value; # not initialised
1354 }
1355
1356 # Read the entire file at once, as one single line, then close it
1357 my $filecontents;
1358 {
1359 local $/ = undef;
1360 $filecontents = <FIN>;
1361 }
1362 close(FIN);
1363
1364 foreach my $regex (@$propertynames) {
1365 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1366 if($value) {
1367 $value =~ s/^\"//; # remove any startquotes
1368 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1369 last; # found value for a matching property, break from loop
1370 }
1371 }
1372
1373 return $value;
1374}
1375
1376
13771;
Note: See TracBrowser for help on using the repository browser.