source: gsdl/trunk/perllib/util.pm@ 18337

Last change on this file since 18337 was 18337, checked in by ak19, 15 years ago
  1. Spaces replaced with underscore in the subroutine rename_file instead of in unicode::url_encode. 2. Now the subroutine takes an optional third parameter which is set when given only a tailname: no suffix to the given filename. This is useful for when the filename contains period marks in the middle, none of which are the one separating tailname from the suffix. The testcase with the 'ridiculous' filename (containing period marks and spaces) is successfully handled again.
  • Property svn:keywords set to Author Date Id Revision
File size: 27.3 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30use MIME::Base64; # for base64 encoding
31
32use strict;
33
34
35# removes files (but not directories)
36sub rm {
37 my (@files) = @_;
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61
62
63# recursive removal
64sub filtered_rm_r {
65 my ($files,$file_accept_re,$file_reject_re) = @_;
66
67 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
68
69 # recursively remove the files
70 foreach my $file (@files_array) {
71 $file =~ s/[\/\\]+$//; # remove trailing slashes
72
73 if (!-e $file) {
74 print STDERR "util::filtered_rm_r $file does not exist\n";
75
76 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
77 # get the contents of this directory
78 if (!opendir (INDIR, $file)) {
79 print STDERR "util::filtered_rm_r could not open directory $file\n";
80 } else {
81 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
82 closedir (INDIR);
83
84 # remove all the files in this directory
85 map {$_="$file/$_";} @filedir;
86 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
87
88 if (!defined $file_accept_re && !defined $file_reject_re) {
89 # remove this directory
90 if (!rmdir $file) {
91 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
92 }
93 }
94 }
95 } else {
96 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
97
98 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
99 # remove this file
100 &rm ($file);
101 }
102 }
103 }
104}
105
106
107# recursive removal
108sub rm_r {
109 my (@files) = @_;
110
111 # use the more general (but reterospectively written function
112 # filtered_rm_r function()
113
114 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
115}
116
117
118
119
120# moves a file or a group of files
121sub mv {
122 my $dest = pop (@_);
123 my (@srcfiles) = @_;
124
125 # remove trailing slashes from source and destination files
126 $dest =~ s/[\\\/]+$//;
127 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
128
129 # a few sanity checks
130 if (scalar (@srcfiles) == 0) {
131 print STDERR "util::mv no destination directory given\n";
132 return;
133 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
134 print STDERR "util::mv if multiple source files are given the ".
135 "destination must be a directory\n";
136 return;
137 }
138
139 # move the files
140 foreach my $file (@srcfiles) {
141 my $tempdest = $dest;
142 if (-d $tempdest) {
143 my ($filename) = $file =~ /([^\\\/]+)$/;
144 $tempdest .= "/$filename";
145 }
146 if (!-e $file) {
147 print STDERR "util::mv $file does not exist\n";
148 } else {
149 rename ($file, $tempdest);
150 }
151 }
152}
153
154
155# copies a file or a group of files
156sub cp {
157 my $dest = pop (@_);
158 my (@srcfiles) = @_;
159
160 # remove trailing slashes from source and destination files
161 $dest =~ s/[\\\/]+$//;
162 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
163
164 # a few sanity checks
165 if (scalar (@srcfiles) == 0) {
166 print STDERR "util::cp no destination directory given\n";
167 return;
168 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
169 print STDERR "util::cp if multiple source files are given the ".
170 "destination must be a directory\n";
171 return;
172 }
173
174 # copy the files
175 foreach my $file (@srcfiles) {
176 my $tempdest = $dest;
177 if (-d $tempdest) {
178 my ($filename) = $file =~ /([^\\\/]+)$/;
179 $tempdest .= "/$filename";
180 }
181 if (!-e $file) {
182 print STDERR "util::cp $file does not exist\n";
183 } elsif (!-f $file) {
184 print STDERR "util::cp $file is not a plain file\n";
185 } else {
186 &File::Copy::copy ($file, $tempdest);
187 }
188 }
189}
190
191
192
193# recursively copies a file or group of files
194# syntax: cp_r (sourcefiles, destination directory)
195# destination must be a directory - to copy one file to
196# another use cp instead
197sub cp_r {
198 my $dest = pop (@_);
199 my (@srcfiles) = @_;
200
201 # a few sanity checks
202 if (scalar (@srcfiles) == 0) {
203 print STDERR "util::cp_r no destination directory given\n";
204 return;
205 } elsif (-f $dest) {
206 print STDERR "util::cp_r destination must be a directory\n";
207 return;
208 }
209
210 # create destination directory if it doesn't exist already
211 if (! -d $dest) {
212 my $store_umask = umask(0002);
213 mkdir ($dest, 0777);
214 umask($store_umask);
215 }
216
217 # copy the files
218 foreach my $file (@srcfiles) {
219
220 if (!-e $file) {
221 print STDERR "util::cp_r $file does not exist\n";
222
223 } elsif (-d $file) {
224 # make the new directory
225 my ($filename) = $file =~ /([^\\\/]*)$/;
226 $dest = &util::filename_cat ($dest, $filename);
227 my $store_umask = umask(0002);
228 mkdir ($dest, 0777);
229 umask($store_umask);
230
231 # get the contents of this directory
232 if (!opendir (INDIR, $file)) {
233 print STDERR "util::cp_r could not open directory $file\n";
234 } else {
235 my @filedir = readdir (INDIR);
236 closedir (INDIR);
237 foreach my $f (@filedir) {
238 next if $f =~ /^\.\.?$/;
239 # copy all the files in this directory
240 my $ff = &util::filename_cat ($file, $f);
241 &cp_r ($ff, $dest);
242 }
243 }
244
245 } else {
246 &cp($file, $dest);
247 }
248 }
249}
250
251# copies a directory and its contents, excluding subdirectories, into a new directory
252sub cp_r_toplevel {
253 my $dest = pop (@_);
254 my (@srcfiles) = @_;
255
256 # a few sanity checks
257 if (scalar (@srcfiles) == 0) {
258 print STDERR "util::cp_r no destination directory given\n";
259 return;
260 } elsif (-f $dest) {
261 print STDERR "util::cp_r destination must be a directory\n";
262 return;
263 }
264
265 # create destination directory if it doesn't exist already
266 if (! -d $dest) {
267 my $store_umask = umask(0002);
268 mkdir ($dest, 0777);
269 umask($store_umask);
270 }
271
272 # copy the files
273 foreach my $file (@srcfiles) {
274
275 if (!-e $file) {
276 print STDERR "util::cp_r $file does not exist\n";
277
278 } elsif (-d $file) {
279 # make the new directory
280 my ($filename) = $file =~ /([^\\\/]*)$/;
281 $dest = &util::filename_cat ($dest, $filename);
282 my $store_umask = umask(0002);
283 mkdir ($dest, 0777);
284 umask($store_umask);
285
286 # get the contents of this directory
287 if (!opendir (INDIR, $file)) {
288 print STDERR "util::cp_r could not open directory $file\n";
289 } else {
290 my @filedir = readdir (INDIR);
291 closedir (INDIR);
292 foreach my $f (@filedir) {
293 next if $f =~ /^\.\.?$/;
294
295 # copy all the files in this directory, but not directories
296 my $ff = &util::filename_cat ($file, $f);
297 if (-f $ff) {
298 &cp($ff, $dest);
299 #&cp_r ($ff, $dest);
300 }
301 }
302 }
303
304 } else {
305 &cp($file, $dest);
306 }
307 }
308}
309
310sub mk_dir {
311 my ($dir) = @_;
312
313 my $store_umask = umask(0002);
314 my $mkdir_ok = mkdir ($dir, 0777);
315 umask($store_umask);
316
317 if (!$mkdir_ok)
318 {
319 print STDERR "util::mk_dir could not create directory $dir\n";
320 return;
321 }
322}
323
324# in case anyone cares - I did some testing (using perls Benchmark module)
325# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
326# slightly faster (surprisingly) - Stefan.
327sub mk_all_dir {
328 my ($dir) = @_;
329
330 # use / for the directory separator, remove duplicate and
331 # trailing slashes
332 $dir=~s/[\\\/]+/\//g;
333 $dir=~s/[\\\/]+$//;
334
335 # make sure the cache directory exists
336 my $dirsofar = "";
337 my $first = 1;
338 foreach my $dirname (split ("/", $dir)) {
339 $dirsofar .= "/" unless $first;
340 $first = 0;
341
342 $dirsofar .= $dirname;
343
344 next if $dirname =~ /^(|[a-z]:)$/i;
345 if (!-e $dirsofar)
346 {
347 my $store_umask = umask(0002);
348 my $mkdir_ok = mkdir ($dirsofar, 0777);
349 umask($store_umask);
350 if (!$mkdir_ok)
351 {
352 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
353 return;
354 }
355 }
356 }
357}
358
359# make hard link to file if supported by OS, otherwise copy the file
360sub hard_link {
361 my ($src, $dest) = @_;
362
363 # remove trailing slashes from source and destination files
364 $src =~ s/[\\\/]+$//;
365 $dest =~ s/[\\\/]+$//;
366
367 # a few sanity checks
368 if (-e $dest) {
369 # destination file already exists
370 return;
371 }
372 elsif (!-e $src) {
373 print STDERR "util::hard_link source file $src does not exist\n";
374 return 1;
375 }
376 elsif (-d $src) {
377 print STDERR "util::hard_link source $src is a directory\n";
378 return 1;
379 }
380
381 my $dest_dir = &File::Basename::dirname($dest);
382 mk_all_dir($dest_dir) if (!-e $dest_dir);
383
384 # link not supported on windows 9x
385 if (($ENV{'GSDLOS'} =~ /^windows$/i) && (Win32::FsType() !~ /^ntfs$/i)) {
386 &File::Copy::copy ($src, $dest);
387
388 } elsif (!link($src, $dest)) {
389 print STDERR "util::hard_link: unable to create hard link. ";
390 print STDERR " Attempting to copy file: $src -> $dest\n";
391 &File::Copy::copy ($src, $dest);
392 }
393 return 0;
394}
395
396# make soft link to file if supported by OS, otherwise copy file
397sub soft_link {
398 my ($src, $dest, $ensure_paths_absolute) = @_;
399
400 # remove trailing slashes from source and destination files
401 $src =~ s/[\\\/]+$//;
402 $dest =~ s/[\\\/]+$//;
403
404 # Ensure file paths are absolute IF requested to do so
405 # Soft_linking didn't work for relative paths
406 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
407 # We need to ensure that the src file is the absolute path
408 # See http://perldoc.perl.org/File/Spec.html
409 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
410 $src = File::Spec->rel2abs($src); # make absolute
411 }
412 # Might as well ensure that the destination file's absolute path is used
413 if(!File::Spec->file_name_is_absolute( $dest )) {
414 $dest = File::Spec->rel2abs($dest); # make absolute
415 }
416 }
417
418 # a few sanity checks
419 if (!-e $src) {
420 print STDERR "util::soft_link source file $src does not exist\n";
421 return 0;
422 }
423
424 my $dest_dir = &File::Basename::dirname($dest);
425 mk_all_dir($dest_dir) if (!-e $dest_dir);
426
427 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
428 # symlink not supported on windows
429 &File::Copy::copy ($src, $dest);
430
431 } elsif (!eval {symlink($src, $dest)}) {
432 print STDERR "util::soft_link: unable to create soft link.\n";
433 return 0;
434 }
435
436 return 1;
437}
438
439
440
441
442# updates a copy of a directory in some other part of the filesystem
443# verbosity settings are: 0=low, 1=normal, 2=high
444# both $fromdir and $todir should be absolute paths
445sub cachedir {
446 my ($fromdir, $todir, $verbosity) = @_;
447 $verbosity = 1 unless defined $verbosity;
448
449 # use / for the directory separator, remove duplicate and
450 # trailing slashes
451 $fromdir=~s/[\\\/]+/\//g;
452 $fromdir=~s/[\\\/]+$//;
453 $todir=~s/[\\\/]+/\//g;
454 $todir=~s/[\\\/]+$//;
455
456 &mk_all_dir ($todir);
457
458 # get the directories in ascending order
459 if (!opendir (FROMDIR, $fromdir)) {
460 print STDERR "util::cachedir could not read directory $fromdir\n";
461 return;
462 }
463 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
464 closedir (FROMDIR);
465
466 if (!opendir (TODIR, $todir)) {
467 print STDERR "util::cacedir could not read directory $todir\n";
468 return;
469 }
470 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
471 closedir (TODIR);
472
473 my $fromi = 0;
474 my $toi = 0;
475
476 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
477# print "fromi: $fromi toi: $toi\n";
478
479 # see if we should delete a file/directory
480 # this should happen if the file/directory
481 # is not in the from list or if its a different
482 # size, or has an older timestamp
483 if ($toi < scalar(@todir)) {
484 if (($fromi >= scalar(@fromdir)) ||
485 ($todir[$toi] lt $fromdir[$fromi] ||
486 ($todir[$toi] eq $fromdir[$fromi] &&
487 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
488 $verbosity)))) {
489
490 # the files are different
491 &rm_r("$todir/$todir[$toi]");
492 splice(@todir, $toi, 1); # $toi stays the same
493
494 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
495 # the files are the same
496 # if it is a directory, check its contents
497 if (-d "$todir/$todir[$toi]") {
498 &cachedir ("$fromdir/$fromdir[$fromi]",
499 "$todir/$todir[$toi]", $verbosity);
500 }
501
502 $toi++;
503 $fromi++;
504 next;
505 }
506 }
507
508 # see if we should insert a file/directory
509 # we should insert a file/directory if there
510 # is no tofiles left or if the tofile does not exist
511 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
512 $todir[$toi] gt $fromdir[$fromi])) {
513 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
514 splice (@todir, $toi, 0, $fromdir[$fromi]);
515
516 $toi++;
517 $fromi++;
518 }
519 }
520}
521
522# this function returns -1 if either file is not found
523# assumes that $file1 and $file2 are absolute file names or
524# in the current directory
525# $file2 is allowed to be newer than $file1
526sub differentfiles {
527 my ($file1, $file2, $verbosity) = @_;
528 $verbosity = 1 unless defined $verbosity;
529
530 $file1 =~ s/\/+$//;
531 $file2 =~ s/\/+$//;
532
533 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
534 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
535
536 return -1 unless (-e $file1 && -e $file2);
537 if ($file1name ne $file2name) {
538 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
539 return 1;
540 }
541
542 my @file1stat = stat ($file1);
543 my @file2stat = stat ($file2);
544
545 if (-d $file1) {
546 if (! -d $file2) {
547 print STDERR "one file is a directory\n" if ($verbosity >= 2);
548 return 1;
549 }
550 return 0;
551 }
552
553 # both must be regular files
554 unless (-f $file1 && -f $file2) {
555 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
556 return 1;
557 }
558
559 # the size of the files must be the same
560 if ($file1stat[7] != $file2stat[7]) {
561 print STDERR "different sized files\n" if ($verbosity >= 2);
562 return 1;
563 }
564
565 # the second file cannot be older than the first
566 if ($file1stat[9] > $file2stat[9]) {
567 print STDERR "file is older\n" if ($verbosity >= 2);
568 return 1;
569 }
570
571 return 0;
572}
573
574
575sub get_tmp_filename
576{
577 my $file_ext = shift(@_) || undef;
578
579 my $opt_dot_file_ext = (defined $file_ext) ? ".$file_ext" : "";
580
581 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
582 &mk_all_dir ($tmpdir) unless -e $tmpdir;
583
584 my $count = 1000;
585 my $rand = int(rand $count);
586 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
587
588 while (-e $full_tmp_filename) {
589 $rand = int(rand $count);
590 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
591 $count++;
592 }
593
594 return $full_tmp_filename;
595}
596
597sub filename_to_regex {
598 my $filename = shift (@_);
599
600 # need to put single backslash back to double so that regex works
601 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
602 $filename =~ s/\\/\\\\/g;
603 }
604 return $filename;
605}
606
607sub filename_cat {
608 my $first_file = shift(@_);
609 my (@filenames) = @_;
610
611# Useful for debugging
612# -- might make sense to call caller(0) rather than (1)??
613# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
614# print STDERR "Calling method; $cfilename:$cline $cpackage->$csubr\n";
615
616 # Check for empty first filename
617 if ($first_file =~ /\S/) {
618 unshift(@filenames, $first_file);
619 }
620
621 my $filename = join("/", @filenames);
622
623 # remove duplicate slashes and remove the last slash
624 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
625 $filename =~ s/[\\\/]+/\\/g;
626 } else {
627 $filename =~ s/[\/]+/\//g;
628 # DB: want a filename abc\de.html to remain like this
629 }
630 $filename =~ s/[\\\/]$//;
631
632 return $filename;
633}
634
635
636sub envvar_prepend {
637 my ($var,$val) = @_;
638
639 # do not prepend any value/path that's already in the environment variable
640 if ($ENV{'GSDLOS'} =~ /^windows$/i)
641 {
642 my $escaped_val = $val;
643 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
644 if($ENV{$var} !~ m/$escaped_val/) {
645 $ENV{$var} = "$val;".$ENV{$var};
646 }
647 }
648 else {
649 if($ENV{$var} !~ m/$val/) {
650 $ENV{$var} = "$val:".$ENV{$var};
651 }
652 }
653}
654
655sub envvar_append {
656 my ($var,$val) = @_;
657
658 # do not append any value/path that's already in the environment variable
659 if ($ENV{'GSDLOS'} =~ /^windows$/i)
660 {
661 my $escaped_val = $val;
662 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
663 if($ENV{$var} !~ m/$escaped_val/) {
664 $ENV{$var} .= ";$val";
665 }
666 }
667 else {
668 if($ENV{$var} !~ m/$val/) {
669 $ENV{$var} .= ":$val";
670 }
671 }
672}
673
674
675# splits a filename into a prefix and a tail extension using the tail_re, or
676# if that fails, splits on the file_extension . (dot)
677sub get_prefix_and_tail_by_regex {
678
679 my ($filename,$tail_re) = @_;
680
681 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
682 if ((!defined $file_prefix) || (!defined $file_ext)) {
683 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
684 }
685
686 return ($file_prefix,$file_ext);
687}
688
689# get full path and file only path from a base_dir (which may be empty) and
690# file (which may contain directories)
691sub get_full_filenames {
692 my ($base_dir, $file) = @_;
693
694 my $filename_full_path = $file;
695 # add on directory if present
696 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
697
698 my $filename_no_path = $file;
699
700 # remove directory if present
701 $filename_no_path =~ s/^.*[\/\\]//;
702 return ($filename_full_path, $filename_no_path);
703}
704
705# returns the path of a file without the filename -- ie. the directory the file is in
706sub filename_head {
707 my $filename = shift(@_);
708
709 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
710 $filename =~ s/[^\\\\]*$//;
711 }
712 else {
713 $filename =~ s/[^\\\/]*$//;
714 }
715
716 return $filename;
717}
718
719
720# returns 1 if filename1 and filename2 point to the same
721# file or directory
722sub filenames_equal {
723 my ($filename1, $filename2) = @_;
724
725 # use filename_cat to clean up trailing slashes and
726 # multiple slashes
727 $filename1 = filename_cat ($filename1);
728 $filename2 = filename_cat ($filename2);
729
730 # filenames not case sensitive on windows
731 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
732 $filename1 =~ tr/[A-Z]/[a-z]/;
733 $filename2 =~ tr/[A-Z]/[a-z]/;
734 }
735 return 1 if $filename1 eq $filename2;
736 return 0;
737}
738
739sub filename_within_collection
740{
741 my ($filename) = @_;
742
743 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
744
745 if (defined $collect_dir) {
746 my $dirsep = &util::get_dirsep();
747 if ($collect_dir !~ m/$dirsep$/) {
748 $collect_dir .= $dirsep;
749 }
750
751 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
752
753 # if from within GSDLCOLLECTDIR, then remove directory prefix
754 # so source_filename is realative to it. This is done to aid
755 # portability, i.e. the collection can be moved to somewhere
756 # else on the file system and the archives directory will still
757 # work. This is needed, for example in the applet version of
758 # GLI where GSDLHOME/collect on the server will be different to
759 # the collect directory of the remove user. Of course,
760 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
761 # it back into a full pathname.
762
763 if ($filename =~ /^$collect_dir(.*)$/) {
764 $filename = $1;
765 }
766 }
767
768 return $filename;
769}
770
771## @method make_absolute()
772#
773# Ensure the given file path is absolute in respect to the given base path.
774#
775# @param $base_dir A string denoting the base path the given dir must be
776# absolute to.
777# @param $dir The directory to be made absolute as a string. Note that the
778# dir may already be absolute, in which case it will remain
779# unchanged.
780# @return The now absolute form of the directory as a string.
781#
782# @author John Thompson, DL Consulting Ltd.
783# @copy 2006 DL Consulting Ltd.
784#
785#used in buildcol.pl, doesn't work for all cases --kjdon
786sub make_absolute {
787
788 my ($base_dir, $dir) = @_;
789 print STDERR "dir = $dir\n";
790 $dir =~ s/[\\\/]+/\//g;
791 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
792 $dir =~ s|^/tmp_mnt||;
793 1 while($dir =~ s|/[^/]*/\.\./|/|g);
794 $dir =~ s|/[.][.]?/|/|g;
795 $dir =~ tr|/|/|s;
796 print STDERR "dir = $dir\n";
797
798 return $dir;
799}
800## make_absolute() ##
801
802sub get_dirsep {
803
804 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
805 return "\\";
806 } else {
807 return "\/";
808 }
809}
810
811sub get_os_dirsep {
812
813 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
814 return "\\\\";
815 } else {
816 return "\\\/";
817 }
818}
819
820sub get_re_dirsep {
821
822 return "\\\\|\\\/";
823}
824
825
826sub get_dirsep_tail {
827 my ($filename) = @_;
828
829 # returns last part of directory or filename
830 # On unix e.g. a/b.d => b.d
831 # a/b/c => c
832
833 my $dirsep = get_re_dirsep();
834 my @dirs = split (/$dirsep/, $filename);
835 my $tail = pop @dirs;
836
837 # - caused problems under windows
838 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
839
840 return $tail;
841}
842
843
844# if this is running on windows we want binaries to end in
845# .exe, otherwise they don't have to end in any extension
846sub get_os_exe {
847 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
848 return "";
849}
850
851
852# test to see whether this is a big or little endian machine
853sub is_little_endian
854{
855 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
856 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
857 # Otherwise, it's little endian
858
859 #return 0 if $^O =~ /^darwin$/i;
860 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
861
862 # Going back to stating exactly whether the machine is little endian
863 # or big endian, without any special case for Macs. Since for rata it comes
864 # back with little endian and for shuttle with bigendian.
865 return (ord(substr(pack("s",1), 0, 1)) == 1);
866}
867
868
869# will return the collection name if successful, "" otherwise
870sub use_collection {
871 my ($collection, $collectdir) = @_;
872
873 if (!defined $collectdir || $collectdir eq "") {
874 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
875 }
876
877 # get and check the collection
878 if (!defined($collection) || $collection eq "") {
879 if (defined $ENV{'GSDLCOLLECTION'}) {
880 $collection = $ENV{'GSDLCOLLECTION'};
881 } else {
882 print STDOUT "No collection specified\n";
883 return "";
884 }
885 }
886
887 if ($collection eq "modelcol") {
888 print STDOUT "You can't use modelcol.\n";
889 return "";
890 }
891
892 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
893 # are defined
894 $ENV{'GSDLCOLLECTION'} = $collection;
895 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
896
897 # make sure this collection exists
898 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
899 print STDOUT "Invalid collection ($collection).\n";
900 return "";
901 }
902
903 # everything is ready to go
904 return $collection;
905}
906
907
908
909
910# will return the collection name if successful, "" otherwise.
911# Like use_collection (above) but for greenstone 3 (taking account of site level)
912
913sub use_site_collection {
914 my ($site, $collection, $collectdir) = @_;
915
916 if (!defined $collectdir || $collectdir eq "") {
917 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
918 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
919 }
920
921 # collectdir explicitly set by this point (using $site variable if required).
922 # Can call "old" gsdl2 use_collection now.
923
924 return use_collection($collection,$collectdir);
925}
926
927
928
929sub locate_config_file
930{
931 my ($file) = @_;
932
933 my $locations = locate_config_files($file);
934
935 return shift @$locations; # returns undef if 'locations' is empty
936}
937
938
939sub locate_config_files
940{
941 my ($file) = @_;
942
943 my @locations = ();
944
945 if (-e $file) {
946 # Clearly specified (most likely full filename)
947 # No need to hunt in 'etc' directories, return value unchanged
948 push(@locations,$file);
949 }
950 else {
951 # Check for collection specific one before looking in global GSDL 'etc'
952 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
953 my $test_collect_etc_filename
954 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
955
956 if (-e $test_collect_etc_filename) {
957 push(@locations,$test_collect_etc_filename);
958 }
959 }
960 my $test_main_etc_filename
961 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
962 if (-e $test_main_etc_filename) {
963 push(@locations,$test_main_etc_filename);
964 }
965 }
966
967 return \@locations;
968}
969
970
971sub hyperlink_text
972{
973 my ($text) = @_;
974
975 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
976 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
977
978 return $text;
979}
980
981
982# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
983# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
984sub is_dir_empty
985{
986 my ($path) = @_;
987 opendir DIR, $path;
988 while(my $entry = readdir DIR) {
989 next if($entry =~ /^\.\.?$/);
990 closedir DIR;
991 return 0;
992 }
993 closedir DIR;
994 return 1;
995}
996
997# Returns the given filename converted using either URL encoding or base64
998# encoding, as specified by $rename_method. If the given filename has no suffix
999# (if it is just the tailname), then $no_suffix should be some defined value.
1000sub rename_file {
1001 my ($filename, $rename_method, $no_suffix) = @_;
1002
1003 if(!$filename) { # undefined or empty string
1004 return $filename;
1005 }
1006
1007 # Replace spaces with underscore.
1008 # Do this first else it can go wrong below when getting tailname
1009 $filename =~ s/ /_/g;
1010
1011 # Should we do this????
1012 # DM safing would have replaced underscores with character entity &#095;
1013 $filename =~ s/&\#095;/_/g;
1014
1015 my ($tailname,$dirname,$suffix);
1016 if($no_suffix) { # given a tailname, no suffix
1017 $suffix = "";
1018 ($tailname,$dirname) = File::Basename::fileparse($filename);
1019 }
1020 else {
1021 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1022 }
1023
1024 if (!$rename_method) {
1025 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1026 # Debugging information
1027 my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1028 print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
1029 }
1030
1031 if (!$rename_method || $rename_method eq "url") {
1032 $tailname = &unicode::url_encode($tailname);
1033 }
1034 elsif ($rename_method eq "base64") {
1035 $tailname = &MIME::Base64::encode_base64($tailname);
1036 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1037 }
1038
1039 $filename = "$tailname$suffix";
1040 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1041
1042 return $filename;
1043}
1044
10451;
Note: See TracBrowser for help on using the repository browser.