source: main/trunk/greenstone2/perllib/util.pm@ 25572

Last change on this file since 25572 was 25572, checked in by ak19, 12 years ago

Newly added mv_dir_contents subroutine was not complete: it did not move the directory contents of subdirectories which I thought was implicit. Added explicit recursion on subfolders and handling of special cases where folders are empty.

  • Property svn:keywords set to Author Date Id Revision
File size: 49.8 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
42
43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
47 foreach my $file (@files) {
48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
71
72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
89
90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96 # recursively remove the files
97 foreach my $file (@files_array) {
98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
101 print STDERR "util::filtered_rm_r $file does not exist\n";
102
103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
106 print STDERR "util::filtered_rm_r could not open directory $file\n";
107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
110
111 # remove all the files in this directory
112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
120 }
121 }
122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
129 }
130 }
131}
132
133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
151
152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
167 foreach my $file (@srcfiles) {
168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 rename ($file, $tempdest);
177 }
178 }
179}
180
181# Move the contents of source directory into target directory
182# (as opposed to merely replacing target dir with the src dir)
183# This can overwrite any files with duplicate names in the target
184# but other files and folders in the target will continue to exist
185sub mv_dir_contents {
186 my ($src_dir, $dest_dir) = @_;
187
188 # Obtain listing of all files within src_dir
189 # Note that readdir lists relative paths, as well as . and ..
190 opendir(DIR, "$src_dir");
191 my @files= readdir(DIR);
192 close(DIR);
193
194 # construct absolute paths
195 my @full_path_files = ();
196 foreach my $file (@files) {
197 # process all except . and ..
198 unless($file eq "." || $file eq "..") {
199 my $tailname = $file;
200 $file = &filename_cat($src_dir, $file);
201
202 if(-d $file) { # recursion on directories
203 #print STDERR "**** $file is a directory, to be copied to $dest_dir\\$tailname\n";
204 &mv_dir_contents($file, &filename_cat($dest_dir, $tailname));
205
206 # now all content is moved across, delete empty dir in source folder
207 if(&is_dir_empty($file)) {
208 &rm_r($file);
209 } else { # error
210 print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_dir\n";
211 }
212 } else { # process files with a simple move
213 push(@full_path_files, $file);
214 }
215 }
216 }
217
218 if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist
219 &mk_dir($dest_dir);
220 }
221
222 #print STDERR "@@@@@ Copying files to: $dest_dir\n";
223 if(@full_path_files) { # if non-empty, there's something to copy across
224 &mv(@full_path_files, $dest_dir);
225 }
226}
227
228
229# copies a file or a group of files
230sub cp {
231 my $dest = pop (@_);
232 my (@srcfiles) = @_;
233
234 # remove trailing slashes from source and destination files
235 $dest =~ s/[\\\/]+$//;
236 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
237
238 # a few sanity checks
239 if (scalar (@srcfiles) == 0) {
240 print STDERR "util::cp no destination directory given\n";
241 return;
242 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
243 print STDERR "util::cp if multiple source files are given the ".
244 "destination must be a directory\n";
245 return;
246 }
247
248 # copy the files
249 foreach my $file (@srcfiles) {
250 my $tempdest = $dest;
251 if (-d $tempdest) {
252 my ($filename) = $file =~ /([^\\\/]+)$/;
253 $tempdest .= "/$filename";
254 }
255 if (!-e $file) {
256 print STDERR "util::cp $file does not exist\n";
257 } elsif (!-f $file) {
258 print STDERR "util::cp $file is not a plain file\n";
259 } else {
260 &File::Copy::copy ($file, $tempdest);
261 }
262 }
263}
264
265
266
267# recursively copies a file or group of files
268# syntax: cp_r (sourcefiles, destination directory)
269# destination must be a directory - to copy one file to
270# another use cp instead
271sub cp_r {
272 my $dest = pop (@_);
273 my (@srcfiles) = @_;
274
275 # a few sanity checks
276 if (scalar (@srcfiles) == 0) {
277 print STDERR "util::cp_r no destination directory given\n";
278 return;
279 } elsif (-f $dest) {
280 print STDERR "util::cp_r destination must be a directory\n";
281 return;
282 }
283
284 # create destination directory if it doesn't exist already
285 if (! -d $dest) {
286 my $store_umask = umask(0002);
287 mkdir ($dest, 0777);
288 umask($store_umask);
289 }
290
291 # copy the files
292 foreach my $file (@srcfiles) {
293
294 if (!-e $file) {
295 print STDERR "util::cp_r $file does not exist\n";
296
297 } elsif (-d $file) {
298 # make the new directory
299 my ($filename) = $file =~ /([^\\\/]*)$/;
300 $dest = &util::filename_cat ($dest, $filename);
301 my $store_umask = umask(0002);
302 mkdir ($dest, 0777);
303 umask($store_umask);
304
305 # get the contents of this directory
306 if (!opendir (INDIR, $file)) {
307 print STDERR "util::cp_r could not open directory $file\n";
308 } else {
309 my @filedir = readdir (INDIR);
310 closedir (INDIR);
311 foreach my $f (@filedir) {
312 next if $f =~ /^\.\.?$/;
313 # copy all the files in this directory
314 my $ff = &util::filename_cat ($file, $f);
315 &cp_r ($ff, $dest);
316 }
317 }
318
319 } else {
320 &cp($file, $dest);
321 }
322 }
323}
324# recursively copies a file or group of files
325# syntax: cp_r (sourcefiles, destination directory)
326# destination must be a directory - to copy one file to
327# another use cp instead
328sub cp_r_nosvn {
329 my $dest = pop (@_);
330 my (@srcfiles) = @_;
331
332 # a few sanity checks
333 if (scalar (@srcfiles) == 0) {
334 print STDERR "util::cp_r no destination directory given\n";
335 return;
336 } elsif (-f $dest) {
337 print STDERR "util::cp_r destination must be a directory\n";
338 return;
339 }
340
341 # create destination directory if it doesn't exist already
342 if (! -d $dest) {
343 my $store_umask = umask(0002);
344 mkdir ($dest, 0777);
345 umask($store_umask);
346 }
347
348 # copy the files
349 foreach my $file (@srcfiles) {
350
351 if (!-e $file) {
352 print STDERR "util::cp_r $file does not exist\n";
353
354 } elsif (-d $file) {
355 # make the new directory
356 my ($filename) = $file =~ /([^\\\/]*)$/;
357 $dest = &util::filename_cat ($dest, $filename);
358 my $store_umask = umask(0002);
359 mkdir ($dest, 0777);
360 umask($store_umask);
361
362 # get the contents of this directory
363 if (!opendir (INDIR, $file)) {
364 print STDERR "util::cp_r could not open directory $file\n";
365 } else {
366 my @filedir = readdir (INDIR);
367 closedir (INDIR);
368 foreach my $f (@filedir) {
369 next if $f =~ /^\.\.?$/;
370 next if $f =~ /^\.svn$/;
371 # copy all the files in this directory
372 my $ff = &util::filename_cat ($file, $f);
373 &cp_r ($ff, $dest);
374 }
375 }
376
377 } else {
378 &cp($file, $dest);
379 }
380 }
381}
382
383# copies a directory and its contents, excluding subdirectories, into a new directory
384sub cp_r_toplevel {
385 my $dest = pop (@_);
386 my (@srcfiles) = @_;
387
388 # a few sanity checks
389 if (scalar (@srcfiles) == 0) {
390 print STDERR "util::cp_r no destination directory given\n";
391 return;
392 } elsif (-f $dest) {
393 print STDERR "util::cp_r destination must be a directory\n";
394 return;
395 }
396
397 # create destination directory if it doesn't exist already
398 if (! -d $dest) {
399 my $store_umask = umask(0002);
400 mkdir ($dest, 0777);
401 umask($store_umask);
402 }
403
404 # copy the files
405 foreach my $file (@srcfiles) {
406
407 if (!-e $file) {
408 print STDERR "util::cp_r $file does not exist\n";
409
410 } elsif (-d $file) {
411 # make the new directory
412 my ($filename) = $file =~ /([^\\\/]*)$/;
413 $dest = &util::filename_cat ($dest, $filename);
414 my $store_umask = umask(0002);
415 mkdir ($dest, 0777);
416 umask($store_umask);
417
418 # get the contents of this directory
419 if (!opendir (INDIR, $file)) {
420 print STDERR "util::cp_r could not open directory $file\n";
421 } else {
422 my @filedir = readdir (INDIR);
423 closedir (INDIR);
424 foreach my $f (@filedir) {
425 next if $f =~ /^\.\.?$/;
426
427 # copy all the files in this directory, but not directories
428 my $ff = &util::filename_cat ($file, $f);
429 if (-f $ff) {
430 &cp($ff, $dest);
431 #&cp_r ($ff, $dest);
432 }
433 }
434 }
435
436 } else {
437 &cp($file, $dest);
438 }
439 }
440}
441
442sub mk_dir {
443 my ($dir) = @_;
444
445 my $store_umask = umask(0002);
446 my $mkdir_ok = mkdir ($dir, 0777);
447 umask($store_umask);
448
449 if (!$mkdir_ok)
450 {
451 print STDERR "util::mk_dir could not create directory $dir\n";
452 return;
453 }
454}
455
456# in case anyone cares - I did some testing (using perls Benchmark module)
457# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
458# slightly faster (surprisingly) - Stefan.
459sub mk_all_dir {
460 my ($dir) = @_;
461
462 # use / for the directory separator, remove duplicate and
463 # trailing slashes
464 $dir=~s/[\\\/]+/\//g;
465 $dir=~s/[\\\/]+$//;
466
467 # make sure the cache directory exists
468 my $dirsofar = "";
469 my $first = 1;
470 foreach my $dirname (split ("/", $dir)) {
471 $dirsofar .= "/" unless $first;
472 $first = 0;
473
474 $dirsofar .= $dirname;
475
476 next if $dirname =~ /^(|[a-z]:)$/i;
477 if (!-e $dirsofar)
478 {
479 my $store_umask = umask(0002);
480 my $mkdir_ok = mkdir ($dirsofar, 0777);
481 umask($store_umask);
482 if (!$mkdir_ok)
483 {
484 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
485 return;
486 }
487 }
488 }
489}
490
491# make hard link to file if supported by OS, otherwise copy the file
492sub hard_link {
493 my ($src, $dest, $verbosity) = @_;
494
495 # remove trailing slashes from source and destination files
496 $src =~ s/[\\\/]+$//;
497 $dest =~ s/[\\\/]+$//;
498
499## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
500 # a few sanity checks
501 if (-e $dest) {
502 # destination file already exists
503 return;
504 }
505 elsif (!-e $src) {
506 print STDERR "util::hard_link source file \"$src\" does not exist\n";
507 return 1;
508 }
509 elsif (-d $src) {
510 print STDERR "util::hard_link source \"$src\" is a directory\n";
511 return 1;
512 }
513
514 my $dest_dir = &File::Basename::dirname($dest);
515 mk_all_dir($dest_dir) if (!-e $dest_dir);
516
517
518 if (!link($src, $dest)) {
519 if ((!defined $verbosity) || ($verbosity>2)) {
520 print STDERR "util::hard_link: unable to create hard link. ";
521 print STDERR " Copying file: $src -> $dest\n";
522 }
523 &File::Copy::copy ($src, $dest);
524 }
525 return 0;
526}
527
528# make soft link to file if supported by OS, otherwise copy file
529sub soft_link {
530 my ($src, $dest, $ensure_paths_absolute) = @_;
531
532 # remove trailing slashes from source and destination files
533 $src =~ s/[\\\/]+$//;
534 $dest =~ s/[\\\/]+$//;
535
536 # Ensure file paths are absolute IF requested to do so
537 # Soft_linking didn't work for relative paths
538 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
539 # We need to ensure that the src file is the absolute path
540 # See http://perldoc.perl.org/File/Spec.html
541 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
542 $src = File::Spec->rel2abs($src); # make absolute
543 }
544 # Might as well ensure that the destination file's absolute path is used
545 if(!File::Spec->file_name_is_absolute( $dest )) {
546 $dest = File::Spec->rel2abs($dest); # make absolute
547 }
548 }
549
550 # a few sanity checks
551 if (!-e $src) {
552 print STDERR "util::soft_link source file $src does not exist\n";
553 return 0;
554 }
555
556 my $dest_dir = &File::Basename::dirname($dest);
557 mk_all_dir($dest_dir) if (!-e $dest_dir);
558
559 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
560
561 # symlink not supported on windows
562 &File::Copy::copy ($src, $dest);
563
564 } elsif (!eval {symlink($src, $dest)}) {
565 print STDERR "util::soft_link: unable to create soft link.\n";
566 return 0;
567 }
568
569 return 1;
570}
571
572# Primarily for filenames generated by processing
573# content of HTML files (which are mapped to UTF-8 internally)
574#
575# To turn this into an octet string that really exists on the file
576# system:
577# 1. don't need to do anything special for Unix-based systems
578# (as underlying file system is byte-code)
579# 2. need to map to short DOS filenames for Windows
580
581sub utf8_to_real_filename
582{
583 my ($utf8_filename) = @_;
584
585 my $real_filename;
586
587 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
588 require Win32;
589
590 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
591
592 my $unicode_filename = decode("utf8",$utf8_filename);
593 $real_filename = Win32::GetShortPathName($unicode_filename);
594 }
595 else {
596 $real_filename = $utf8_filename;
597 }
598
599 return $real_filename;
600}
601
602
603sub fd_exists
604{
605 my $filename_full_path = shift @_;
606 my $test_op = shift @_ || "-e";
607
608 # By default tests for existance of file or directory (-e)
609 # Can be made more specific by providing second parameter (e.g. -f or -d)
610
611 my $exists = 0;
612
613 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
614 require Win32;
615 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
616 if (!defined $filename_short_path) {
617 # Was probably still in UTF8 form (not what is needed on Windows)
618 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
619 if (defined $unicode_filename_full_path) {
620 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
621 }
622 }
623 $filename_full_path = $filename_short_path;
624 }
625
626 if (defined $filename_full_path) {
627 $exists = eval "($test_op \$filename_full_path)";
628 }
629
630 return $exists;
631}
632
633sub file_exists
634{
635 my ($filename_full_path) = @_;
636
637 return fd_exists($filename_full_path,"-f");
638}
639
640sub dir_exists
641{
642 my ($filename_full_path) = @_;
643
644 return fd_exists($filename_full_path,"-d");
645}
646
647
648
649# updates a copy of a directory in some other part of the filesystem
650# verbosity settings are: 0=low, 1=normal, 2=high
651# both $fromdir and $todir should be absolute paths
652sub cachedir {
653 my ($fromdir, $todir, $verbosity) = @_;
654 $verbosity = 1 unless defined $verbosity;
655
656 # use / for the directory separator, remove duplicate and
657 # trailing slashes
658 $fromdir=~s/[\\\/]+/\//g;
659 $fromdir=~s/[\\\/]+$//;
660 $todir=~s/[\\\/]+/\//g;
661 $todir=~s/[\\\/]+$//;
662
663 &mk_all_dir ($todir);
664
665 # get the directories in ascending order
666 if (!opendir (FROMDIR, $fromdir)) {
667 print STDERR "util::cachedir could not read directory $fromdir\n";
668 return;
669 }
670 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
671 closedir (FROMDIR);
672
673 if (!opendir (TODIR, $todir)) {
674 print STDERR "util::cacedir could not read directory $todir\n";
675 return;
676 }
677 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
678 closedir (TODIR);
679
680 my $fromi = 0;
681 my $toi = 0;
682
683 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
684# print "fromi: $fromi toi: $toi\n";
685
686 # see if we should delete a file/directory
687 # this should happen if the file/directory
688 # is not in the from list or if its a different
689 # size, or has an older timestamp
690 if ($toi < scalar(@todir)) {
691 if (($fromi >= scalar(@fromdir)) ||
692 ($todir[$toi] lt $fromdir[$fromi] ||
693 ($todir[$toi] eq $fromdir[$fromi] &&
694 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
695 $verbosity)))) {
696
697 # the files are different
698 &rm_r("$todir/$todir[$toi]");
699 splice(@todir, $toi, 1); # $toi stays the same
700
701 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
702 # the files are the same
703 # if it is a directory, check its contents
704 if (-d "$todir/$todir[$toi]") {
705 &cachedir ("$fromdir/$fromdir[$fromi]",
706 "$todir/$todir[$toi]", $verbosity);
707 }
708
709 $toi++;
710 $fromi++;
711 next;
712 }
713 }
714
715 # see if we should insert a file/directory
716 # we should insert a file/directory if there
717 # is no tofiles left or if the tofile does not exist
718 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
719 $todir[$toi] gt $fromdir[$fromi])) {
720 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
721 splice (@todir, $toi, 0, $fromdir[$fromi]);
722
723 $toi++;
724 $fromi++;
725 }
726 }
727}
728
729# this function returns -1 if either file is not found
730# assumes that $file1 and $file2 are absolute file names or
731# in the current directory
732# $file2 is allowed to be newer than $file1
733sub differentfiles {
734 my ($file1, $file2, $verbosity) = @_;
735 $verbosity = 1 unless defined $verbosity;
736
737 $file1 =~ s/\/+$//;
738 $file2 =~ s/\/+$//;
739
740 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
741 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
742
743 return -1 unless (-e $file1 && -e $file2);
744 if ($file1name ne $file2name) {
745 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
746 return 1;
747 }
748
749 my @file1stat = stat ($file1);
750 my @file2stat = stat ($file2);
751
752 if (-d $file1) {
753 if (! -d $file2) {
754 print STDERR "one file is a directory\n" if ($verbosity >= 2);
755 return 1;
756 }
757 return 0;
758 }
759
760 # both must be regular files
761 unless (-f $file1 && -f $file2) {
762 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
763 return 1;
764 }
765
766 # the size of the files must be the same
767 if ($file1stat[7] != $file2stat[7]) {
768 print STDERR "different sized files\n" if ($verbosity >= 2);
769 return 1;
770 }
771
772 # the second file cannot be older than the first
773 if ($file1stat[9] > $file2stat[9]) {
774 print STDERR "file is older\n" if ($verbosity >= 2);
775 return 1;
776 }
777
778 return 0;
779}
780
781
782sub get_tmp_filename
783{
784 my $file_ext = shift(@_) || undef;
785
786 my $opt_dot_file_ext = "";
787 if (defined $file_ext) {
788 if ($file_ext !~ m/\./) {
789 # no dot, so needs one added in at start
790 $opt_dot_file_ext = ".$file_ext"
791 }
792 else {
793 # allow for "extensions" such as _metadata.txt to be handled
794 # gracefully
795 $opt_dot_file_ext = $file_ext;
796 }
797 }
798
799 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
800 &mk_all_dir ($tmpdir) unless -e $tmpdir;
801
802 my $count = 1000;
803 my $rand = int(rand $count);
804 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
805
806 while (-e $full_tmp_filename) {
807 $rand = int(rand $count);
808 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
809 $count++;
810 }
811
812 return $full_tmp_filename;
813}
814
815sub get_timestamped_tmp_folder
816{
817
818 my $tmp_dirname;
819 if(defined $ENV{'GSDLCOLLECTDIR'}) {
820 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
821 } elsif(defined $ENV{'GSDLHOME'}) {
822 $tmp_dirname = $ENV{'GSDLHOME'};
823 } else {
824 return undef;
825 }
826
827 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
828 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
829
830 # add the timestamp into the path otherwise we can run into problems
831 # if documents have the same name
832 my $timestamp = time;
833 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
834 $tmp_dirname = $time_tmp_dirname;
835 my $i = 1;
836 while (-e $tmp_dirname) {
837 $tmp_dirname = "$time_tmp_dirname$i";
838 $i++;
839 }
840 &util::mk_dir($tmp_dirname);
841
842 return $tmp_dirname;
843}
844
845sub get_timestamped_tmp_filename_in_collection
846{
847
848 my ($input_filename, $output_ext) = @_;
849 # derive tmp filename from input filename
850 my ($tailname, $dirname, $suffix)
851 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
852
853 # softlink to collection tmp dir
854 my $tmp_dirname = &util::get_timestamped_tmp_folder();
855 $tmp_dirname = $dirname unless defined $tmp_dirname;
856
857 # following two steps copied from ConvertBinaryFile
858 # do we need them?? can't use them as is, as they use plugin methods.
859
860 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
861
862 # URLEncode this since htmls with images where the html filename is utf8 don't seem
863 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
864 # files on the filesystem.
865 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
866 if (defined $output_ext) {
867 $output_ext = ".$output_ext"; # add the dot
868 } else {
869 $output_ext = $suffix;
870 }
871 $output_ext= lc($output_ext);
872 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
873
874 return $tmp_filename;
875}
876
877sub get_toplevel_tmp_dir
878{
879 return filename_cat($ENV{'GSDLHOME'}, "tmp");
880}
881
882
883sub filename_to_regex {
884 my $filename = shift (@_);
885
886 # need to make single backslashes double so that regex works
887 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
888
889 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
890 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
891 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
892 $filename =~ s@\(@\\(@g; # escape brackets
893 $filename =~ s@\)@\\)@g; # escape brackets
894 $filename =~ s@\[@\\[@g; # escape brackets
895 $filename =~ s@\]@\\]@g; # escape brackets
896
897 return $filename;
898}
899
900sub unregex_filename {
901 my $filename = shift (@_);
902
903 # need to put doubled backslashes for regex back to single
904 $filename =~ s/\\\./\./g; # remove RE syntax for .
905 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
906 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
907 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
908 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
909
910 # \\ goes to \
911 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
912 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
913 return $filename;
914}
915
916sub filename_cat {
917 my $first_file = shift(@_);
918 my (@filenames) = @_;
919
920# Useful for debugging
921# -- might make sense to call caller(0) rather than (1)??
922# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
923# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
924
925 # If first_file is not null or empty, then add it back into the list
926 if (defined $first_file && $first_file =~ /\S/) {
927 unshift(@filenames, $first_file);
928 }
929
930 my $filename = join("/", @filenames);
931
932 # remove duplicate slashes and remove the last slash
933 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
934 $filename =~ s/[\\\/]+/\\/g;
935 } else {
936 $filename =~ s/[\/]+/\//g;
937 # DB: want a filename abc\de.html to remain like this
938 }
939 $filename =~ s/[\\\/]$//;
940
941 return $filename;
942}
943
944
945sub pathname_cat {
946 my $first_path = shift(@_);
947 my (@pathnames) = @_;
948
949 # If first_path is not null or empty, then add it back into the list
950 if (defined $first_path && $first_path =~ /\S/) {
951 unshift(@pathnames, $first_path);
952 }
953
954 my $join_char;
955 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
956 $join_char = ";";
957 } else {
958 $join_char = ":";
959 }
960
961 my $pathname = join($join_char, @pathnames);
962
963 # remove duplicate slashes
964 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
965 $pathname =~ s/[\\\/]+/\\/g;
966 } else {
967 $pathname =~ s/[\/]+/\//g;
968 # DB: want a pathname abc\de.html to remain like this
969 }
970
971 return $pathname;
972}
973
974
975sub tidy_up_oid {
976 my ($OID) = @_;
977 if ($OID =~ /\./) {
978 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
979 $OID =~ s/\.//g; #remove any periods
980 }
981 if ($OID =~ /^\s.*\s$/) {
982 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
983 # remove starting and trailing whitespace
984 $OID =~ s/^\s+//;
985 $OID =~ s/\s+$//;
986 }
987 if ($OID =~ /^[\d]*$/) {
988 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
989 $OID = "D" . $OID;
990 }
991
992 return $OID;
993}
994sub envvar_prepend {
995 my ($var,$val) = @_;
996
997 # do not prepend any value/path that's already in the environment variable
998
999 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1000 if (!defined($ENV{$var})) {
1001 $ENV{$var} = "$val";
1002 }
1003 elsif($ENV{$var} !~ m/$escaped_val/) {
1004 $ENV{$var} = "$val;".$ENV{$var};
1005 }
1006}
1007
1008sub envvar_append {
1009 my ($var,$val) = @_;
1010
1011 # do not append any value/path that's already in the environment variable
1012
1013 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1014 if (!defined($ENV{$var})) {
1015 $ENV{$var} = "$val";
1016 }
1017 elsif($ENV{$var} !~ m/$escaped_val/) {
1018 $ENV{$var} .= ";$val";
1019 }
1020}
1021
1022
1023# splits a filename into a prefix and a tail extension using the tail_re, or
1024# if that fails, splits on the file_extension . (dot)
1025sub get_prefix_and_tail_by_regex {
1026
1027 my ($filename,$tail_re) = @_;
1028
1029 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
1030 if ((!defined $file_prefix) || (!defined $file_ext)) {
1031 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1032 }
1033
1034 return ($file_prefix,$file_ext);
1035}
1036
1037# get full path and file only path from a base_dir (which may be empty) and
1038# file (which may contain directories)
1039sub get_full_filenames {
1040 my ($base_dir, $file) = @_;
1041
1042 my $filename_full_path = $file;
1043 # add on directory if present
1044 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1045
1046 my $filename_no_path = $file;
1047
1048 # remove directory if present
1049 $filename_no_path =~ s/^.*[\/\\]//;
1050 return ($filename_full_path, $filename_no_path);
1051}
1052
1053# returns the path of a file without the filename -- ie. the directory the file is in
1054sub filename_head {
1055 my $filename = shift(@_);
1056
1057 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1058 $filename =~ s/[^\\\\]*$//;
1059 }
1060 else {
1061 $filename =~ s/[^\\\/]*$//;
1062 }
1063
1064 return $filename;
1065}
1066
1067
1068
1069# returns 1 if filename1 and filename2 point to the same
1070# file or directory
1071sub filenames_equal {
1072 my ($filename1, $filename2) = @_;
1073
1074 # use filename_cat to clean up trailing slashes and
1075 # multiple slashes
1076 $filename1 = filename_cat ($filename1);
1077 $filename2 = filename_cat ($filename2);
1078
1079 # filenames not case sensitive on windows
1080 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1081 $filename1 =~ tr/[A-Z]/[a-z]/;
1082 $filename2 =~ tr/[A-Z]/[a-z]/;
1083 }
1084 return 1 if $filename1 eq $filename2;
1085 return 0;
1086}
1087
1088# If filename is relative to within_dir, returns the relative path of filename to that directory
1089# with slashes in the filename returned as they were in the original (absolute) filename.
1090sub filename_within_directory
1091{
1092 my ($filename,$within_dir) = @_;
1093
1094 if ($within_dir !~ m/[\/\\]$/) {
1095 my $dirsep = &util::get_dirsep();
1096 $within_dir .= $dirsep;
1097 }
1098
1099 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
1100 if ($filename =~ m/^$within_dir(.*)$/) {
1101 $filename = $1;
1102 }
1103
1104 return $filename;
1105}
1106
1107# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1108# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1109# The subpath returned will also be a URL type filename.
1110sub filename_within_directory_url_format
1111{
1112 my ($filename,$within_dir) = @_;
1113
1114 # convert parameters only to / slashes if Windows
1115
1116 my $filename_urlformat = &filepath_to_url_format($filename);
1117 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
1118
1119 #if ($within_dir_urlformat !~ m/\/$/) {
1120 # make sure directory ends with a slash
1121 #$within_dir_urlformat .= "/";
1122 #}
1123
1124 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1125
1126 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1127
1128 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1129 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1130 $filename_urlformat = $1;
1131 }
1132
1133 return $filename_urlformat;
1134}
1135
1136# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
1137# since on Linux it doesn't represent a file separator but an escape char).
1138sub filepath_to_url_format
1139{
1140 my ($filepath) = @_;
1141 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1142 # Only need to worry about Windows, as Unix style directories already in url-format
1143 # Convert Windows style \ => /
1144 $filepath =~ s@\\@/@g;
1145 }
1146 return $filepath;
1147}
1148
1149# regex filepaths on windows may include \\ as path separator. Convert \\ to /
1150sub filepath_regex_to_url_format
1151{
1152 my ($filepath) = @_;
1153 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1154 # Only need to worry about Windows, as Unix style directories already in url-format
1155 # Convert Windows style \\ => /
1156 $filepath =~ s@\\\\@/@g;
1157 }
1158 return $filepath;
1159
1160}
1161
1162# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
1163# and ignores trailing /
1164# returns (file, dirs) dirs will be empty if no subdirs
1165sub url_fileparse
1166{
1167 my ($filepath) = @_;
1168 # remove trailing /
1169 $filepath =~ s@/$@@;
1170 if ($filepath !~ m@/@) {
1171 return ($filepath, "");
1172 }
1173 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
1174 return ($file, $dirs);
1175
1176}
1177
1178
1179sub filename_within_collection
1180{
1181 my ($filename) = @_;
1182
1183 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1184
1185 if (defined $collect_dir) {
1186
1187 # if from within GSDLCOLLECTDIR, then remove directory prefix
1188 # so source_filename is realative to it. This is done to aid
1189 # portability, i.e. the collection can be moved to somewhere
1190 # else on the file system and the archives directory will still
1191 # work. This is needed, for example in the applet version of
1192 # GLI where GSDLHOME/collect on the server will be different to
1193 # the collect directory of the remove user. Of course,
1194 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1195 # it back into a full pathname.
1196
1197 $filename = filename_within_directory($filename,$collect_dir);
1198 }
1199
1200 return $filename;
1201}
1202
1203sub prettyprint_file
1204{
1205 my ($base_dir,$file,$gli) = @_;
1206
1207 my $filename_full_path = &util::filename_cat($base_dir,$file);
1208
1209 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1210 require Win32;
1211
1212 # For some reason base_dir in the form c:/a/b/c
1213 # This leads to confusion later on, so turn it back into
1214 # the more usual Windows form
1215 $base_dir =~ s/\//\\/g;
1216 my $long_base_dir = Win32::GetLongPathName($base_dir);
1217 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1218
1219 $file = filename_within_directory($long_full_path,$long_base_dir);
1220 $file = encode("utf8",$file) if ($gli);
1221 }
1222
1223 return $file;
1224}
1225
1226
1227sub upgrade_if_dos_filename
1228{
1229 my ($filename_full_path,$and_encode) = @_;
1230
1231 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1232 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1233 # to its long (Windows) version
1234 my $long_filename = Win32::GetLongPathName($filename_full_path);
1235 if (defined $long_filename) {
1236 $filename_full_path = $long_filename;
1237 }
1238 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1239 $filename_full_path =~ s/^(.):/\u$1:/;
1240 if ((defined $and_encode) && ($and_encode)) {
1241 $filename_full_path = encode("utf8",$filename_full_path);
1242 }
1243 }
1244
1245 return $filename_full_path;
1246}
1247
1248
1249sub downgrade_if_dos_filename
1250{
1251 my ($filename_full_path) = @_;
1252
1253 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1254 require Win32;
1255
1256 # Ensure the given long Windows filename is in a form that can
1257 # be opened by Perl => convert it to a short DOS-like filename
1258
1259 my $short_filename = Win32::GetShortPathName($filename_full_path);
1260 if (defined $short_filename) {
1261 $filename_full_path = $short_filename;
1262 }
1263 # Make sure initial drive letter is lower-case (to fit in
1264 # with rest of Greenstone)
1265 $filename_full_path =~ s/^(.):/\u$1:/;
1266 }
1267
1268 return $filename_full_path;
1269}
1270
1271sub block_filename
1272{
1273 my ($block_hash,$filename) = @_;
1274
1275 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1276
1277 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1278 my $lower_filename = lc($filename);
1279 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1280# my $lower_drive = $filename;
1281# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1282
1283# my $upper_drive = $filename;
1284# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1285#
1286# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1287# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1288 }
1289 else {
1290 $block_hash->{'file_blocks'}->{$filename} = 1;
1291 }
1292}
1293
1294
1295sub filename_is_absolute
1296{
1297 my ($filename) = @_;
1298
1299 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1300 return ($filename =~ m/^(\w:)?\\/);
1301 }
1302 else {
1303 return ($filename =~ m/^\//);
1304 }
1305}
1306
1307
1308## @method make_absolute()
1309#
1310# Ensure the given file path is absolute in respect to the given base path.
1311#
1312# @param $base_dir A string denoting the base path the given dir must be
1313# absolute to.
1314# @param $dir The directory to be made absolute as a string. Note that the
1315# dir may already be absolute, in which case it will remain
1316# unchanged.
1317# @return The now absolute form of the directory as a string.
1318#
1319# @author John Thompson, DL Consulting Ltd.
1320# @copy 2006 DL Consulting Ltd.
1321#
1322#used in buildcol.pl, doesn't work for all cases --kjdon
1323sub make_absolute {
1324
1325 my ($base_dir, $dir) = @_;
1326### print STDERR "dir = $dir\n";
1327 $dir =~ s/[\\\/]+/\//g;
1328 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1329 $dir =~ s|^/tmp_mnt||;
1330 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1331 $dir =~ s|/[.][.]?/|/|g;
1332 $dir =~ tr|/|/|s;
1333### print STDERR "dir = $dir\n";
1334
1335 return $dir;
1336}
1337## make_absolute() ##
1338
1339sub get_dirsep {
1340
1341 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1342 return "\\";
1343 } else {
1344 return "\/";
1345 }
1346}
1347
1348sub get_os_dirsep {
1349
1350 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1351 return "\\\\";
1352 } else {
1353 return "\\\/";
1354 }
1355}
1356
1357sub get_re_dirsep {
1358
1359 return "\\\\|\\\/";
1360}
1361
1362
1363sub get_dirsep_tail {
1364 my ($filename) = @_;
1365
1366 # returns last part of directory or filename
1367 # On unix e.g. a/b.d => b.d
1368 # a/b/c => c
1369
1370 my $dirsep = get_re_dirsep();
1371 my @dirs = split (/$dirsep/, $filename);
1372 my $tail = pop @dirs;
1373
1374 # - caused problems under windows
1375 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1376
1377 return $tail;
1378}
1379
1380
1381# if this is running on windows we want binaries to end in
1382# .exe, otherwise they don't have to end in any extension
1383sub get_os_exe {
1384 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1385 return "";
1386}
1387
1388
1389# test to see whether this is a big or little endian machine
1390sub is_little_endian
1391{
1392 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1393 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1394 # Otherwise, it's little endian
1395
1396 #return 0 if $^O =~ /^darwin$/i;
1397 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1398
1399 # Going back to stating exactly whether the machine is little endian
1400 # or big endian, without any special case for Macs. Since for rata it comes
1401 # back with little endian and for shuttle with bigendian.
1402 return (ord(substr(pack("s",1), 0, 1)) == 1);
1403}
1404
1405
1406# will return the collection name if successful, "" otherwise
1407sub use_collection {
1408 my ($collection, $collectdir) = @_;
1409
1410 if (!defined $collectdir || $collectdir eq "") {
1411 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1412 }
1413
1414 # get and check the collection
1415 if (!defined($collection) || $collection eq "") {
1416 if (defined $ENV{'GSDLCOLLECTION'}) {
1417 $collection = $ENV{'GSDLCOLLECTION'};
1418 } else {
1419 print STDOUT "No collection specified\n";
1420 return "";
1421 }
1422 }
1423
1424 if ($collection eq "modelcol") {
1425 print STDOUT "You can't use modelcol.\n";
1426 return "";
1427 }
1428
1429 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1430 # are defined
1431 $ENV{'GSDLCOLLECTION'} = $collection;
1432 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1433
1434 # make sure this collection exists
1435 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1436 print STDOUT "Invalid collection ($collection).\n";
1437 return "";
1438 }
1439
1440 # everything is ready to go
1441 return $collection;
1442}
1443
1444sub get_current_collection_name {
1445 return $ENV{'GSDLCOLLECTION'};
1446}
1447
1448
1449# will return the collection name if successful, "" otherwise.
1450# Like use_collection (above) but for greenstone 3 (taking account of site level)
1451
1452sub use_site_collection {
1453 my ($site, $collection, $collectdir) = @_;
1454
1455 if (!defined $collectdir || $collectdir eq "") {
1456 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1457 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1458 }
1459
1460 # collectdir explicitly set by this point (using $site variable if required).
1461 # Can call "old" gsdl2 use_collection now.
1462
1463 return use_collection($collection,$collectdir);
1464}
1465
1466
1467
1468sub locate_config_file
1469{
1470 my ($file) = @_;
1471
1472 my $locations = locate_config_files($file);
1473
1474 return shift @$locations; # returns undef if 'locations' is empty
1475}
1476
1477
1478sub locate_config_files
1479{
1480 my ($file) = @_;
1481
1482 my @locations = ();
1483
1484 if (-e $file) {
1485 # Clearly specified (most likely full filename)
1486 # No need to hunt in 'etc' directories, return value unchanged
1487 push(@locations,$file);
1488 }
1489 else {
1490 # Check for collection specific one before looking in global GSDL 'etc'
1491 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1492 my $test_collect_etc_filename
1493 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1494
1495 if (-e $test_collect_etc_filename) {
1496 push(@locations,$test_collect_etc_filename);
1497 }
1498 }
1499 my $test_main_etc_filename
1500 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1501 if (-e $test_main_etc_filename) {
1502 push(@locations,$test_main_etc_filename);
1503 }
1504 }
1505
1506 return \@locations;
1507}
1508
1509
1510sub hyperlink_text
1511{
1512 my ($text) = @_;
1513
1514 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1515 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1516
1517 return $text;
1518}
1519
1520
1521# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1522# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1523sub is_dir_empty
1524{
1525 my ($path) = @_;
1526 opendir DIR, $path;
1527 while(my $entry = readdir DIR) {
1528 next if($entry =~ /^\.\.?$/);
1529 closedir DIR;
1530 return 0;
1531 }
1532 closedir DIR;
1533 return 1;
1534}
1535
1536# Returns the given filename converted using either URL encoding or base64
1537# encoding, as specified by $rename_method. If the given filename has no suffix
1538# (if it is just the tailname), then $no_suffix should be some defined value.
1539# rename_method can be url, none, base64
1540sub rename_file {
1541 my ($filename, $rename_method, $no_suffix) = @_;
1542
1543 if(!$filename) { # undefined or empty string
1544 return $filename;
1545 }
1546
1547 if (!$rename_method) {
1548 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1549 # Debugging information
1550 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1551 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1552 $rename_method = "url";
1553 } elsif($rename_method eq "none") {
1554 return $filename; # would have already been renamed
1555 }
1556
1557 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1558 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1559 ###$filename =~ s/ /_/g;
1560
1561 my ($tailname,$dirname,$suffix);
1562 if($no_suffix) { # given a tailname, no suffix
1563 ($tailname,$dirname) = File::Basename::fileparse($filename);
1564 }
1565 else {
1566 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1567 }
1568 if (!$suffix) {
1569 $suffix = "";
1570 }
1571 else {
1572 $suffix = lc($suffix);
1573 }
1574
1575 if ($rename_method eq "url") {
1576 $tailname = &unicode::url_encode($tailname);
1577 }
1578 elsif ($rename_method eq "base64") {
1579 $tailname = &unicode::base64_encode($tailname);
1580 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1581 }
1582
1583 $filename = "$tailname$suffix";
1584 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1585
1586 return $filename;
1587}
1588
1589
1590# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1591sub rename_ldb_or_bdb_file {
1592 my ($filename_no_ext) = @_;
1593
1594 my $new_filename = "$filename_no_ext.gdb";
1595 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1596 # try ldb
1597 my $old_filename = "$filename_no_ext.ldb";
1598
1599 if (-f $old_filename) {
1600 print STDERR "Renaming $old_filename to $new_filename\n";
1601 rename ($old_filename, $new_filename)
1602 || print STDERR "Rename failed: $!\n";
1603 return;
1604 }
1605 # try bdb
1606 $old_filename = "$filename_no_ext.bdb";
1607 if (-f $old_filename) {
1608 print STDERR "Renaming $old_filename to $new_filename\n";
1609 rename ($old_filename, $new_filename)
1610 || print STDERR "Rename failed: $!\n";
1611 return;
1612 }
1613}
1614
1615sub os_dir() {
1616
1617 my $gsdlarch = "";
1618 if(defined $ENV{'GSDLARCH'}) {
1619 $gsdlarch = $ENV{'GSDLARCH'};
1620 }
1621 return $ENV{'GSDLOS'}.$gsdlarch;
1622}
1623
1624# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1625# By default, /greenstone3 for GS3 or /greenstone for GS2.
1626sub get_greenstone_url_prefix() {
1627 # if already set on a previous occasion, just return that
1628 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1629 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1630
1631 my ($configfile, $urlprefix, $defaultUrlprefix);
1632 my @propertynames = ();
1633
1634 if($ENV{'GSDL3SRCHOME'}) {
1635 $defaultUrlprefix = "/greenstone3";
1636 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1637 push(@propertynames, qw/path\s*\=/);
1638 } else {
1639 $defaultUrlprefix = "/greenstone";
1640 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1641 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1642 }
1643
1644 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1645
1646 if(!$urlprefix) { # no values found for URL prefix, use default values
1647 $urlprefix = $defaultUrlprefix;
1648 } else {
1649 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1650 $urlprefix =~ s/^\///; # remove the starting slash
1651 my @dirs = split(/(\\|\/)/, $urlprefix);
1652 $urlprefix = shift(@dirs);
1653
1654 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1655 $urlprefix = "/$urlprefix";
1656 }
1657 }
1658
1659 # set for the future
1660 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1661# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1662 return $urlprefix;
1663}
1664
1665
1666# Given a config file (xml or java properties file) and a list/array of regular expressions
1667# that represent property names to match on, this function will return the value for the 1st
1668# matching property name. If the return value is undefined, no matching property was found.
1669sub extract_propvalue_from_file() {
1670 my ($configfile, $propertynames) = @_;
1671
1672 my $value;
1673 unless(open(FIN, "<$configfile")) {
1674 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1675 return $value; # not initialised
1676 }
1677
1678 # Read the entire file at once, as one single line, then close it
1679 my $filecontents;
1680 {
1681 local $/ = undef;
1682 $filecontents = <FIN>;
1683 }
1684 close(FIN);
1685
1686 foreach my $regex (@$propertynames) {
1687 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1688 if($value) {
1689 $value =~ s/^\"//; # remove any startquotes
1690 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1691 last; # found value for a matching property, break from loop
1692 }
1693 }
1694
1695 return $value;
1696}
1697
1698# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1699# given that perllib is in @INC in order to invoke this subroutine.
1700# Call as follows -- after setting up INC to include perllib and
1701# after setting up GSDLHOME and GSDLOS:
1702#
1703# require util;
1704# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1705#
1706sub setup_greenstone_env() {
1707 my ($GSDLHOME, $GSDLOS) = @_;
1708
1709 #my %env_map = ();
1710 # Get the localised ENV settings of running a localised source setup.bash
1711 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1712 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1713 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1714 if($GSDLOS =~ m/windows/i) {
1715 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1716 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1717 }
1718 if (!open(PIN, "$perl_command |")) {
1719 print STDERR ("Unable to execute command: $perl_command. $!\n");
1720 }
1721
1722 while (defined (my $perl_output_line = <PIN>)) {
1723 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1724 #$env_map{$key}=$value;
1725 $ENV{$key}=$value;
1726 }
1727 close (PIN);
1728
1729 # If any keys in $ENV don't occur in Greenstone's localised env
1730 # (stored in $env_map), delete those entries from $ENV
1731 #foreach $key (keys %ENV) {
1732 # if(!defined $env_map{$key}) {
1733 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1734 # delete $ENV{$key}; # del $ENV(key, value) pair
1735 # }
1736 #}
1737 #undef %env_map;
1738}
1739
1740sub get_perl_exec() {
1741 my $perl_exec = $^X; # may return just "perl"
1742
1743 if($ENV{'PERLPATH'}) {
1744 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1745 if($ENV{'GSDLOS'} =~ m/windows/) {
1746 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1747 } else {
1748 $perl_exec = "$ENV{'PERLPATH'}/perl";
1749 }
1750 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1751 # containing the full path to the current perl executable we're using
1752 $perl_exec = $Config{perlpath}; # configured path for perl
1753 if (!-e $perl_exec) { # may not point to location on this machine
1754 $perl_exec = $^X; # may return just "perl"
1755 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1756 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1757 }
1758 }
1759 }
1760
1761 return $perl_exec;
1762}
1763
1764# returns the path to the java command in the JRE included with GS (if any),
1765# quoted to safeguard any spaces in this path, otherwise a simple java
1766# command is returned which assumes and will try for a system java.
1767sub get_java_command {
1768 my $java = "java";
1769 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1770 # after running setup.bat or from GLI which also runs setup.bat
1771 my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin");
1772 if(-d $java_bin) {
1773 $java = &util::filename_cat($java_bin,"java");
1774 $java = "\"".$java."\""; # quoted to preserve spaces in path
1775 }
1776 }
1777 return $java;
1778}
1779
1780
17811;
Note: See TracBrowser for help on using the repository browser.