source: main/trunk/greenstone2/perllib/util.pm@ 25578

Last change on this file since 25578 was 25578, checked in by ak19, 12 years ago

Modifying recently introduced mv_dir_contents method again: recursion call to mv_dir_contents on any directory need only happen if the same directory (with the same sub path) also exists in the target directory. For regular files and any directories not duplicated in target, a straightforward call to mv is sufficient. Reducing the amount of recursion would make it more efficient.

  • Property svn:keywords set to Author Date Id Revision
File size: 51.8 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
42
43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
47 foreach my $file (@files) {
48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
71
72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
89
90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96 # recursively remove the files
97 foreach my $file (@files_array) {
98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
101 print STDERR "util::filtered_rm_r $file does not exist\n";
102
103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
106 print STDERR "util::filtered_rm_r could not open directory $file\n";
107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
110
111 # remove all the files in this directory
112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
120 }
121 }
122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
129 }
130 }
131}
132
133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
151
152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
167 foreach my $file (@srcfiles) {
168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 rename ($file, $tempdest);
177 }
178 }
179}
180
181# Move the contents of source directory into target directory
182# (as opposed to merely replacing target dir with the src dir)
183# This can overwrite any files with duplicate names in the target
184# but other files and folders in the target will continue to exist
185sub mv_dir_contents {
186 my ($src_dir, $dest_dir) = @_;
187
188 # Obtain listing of all files within src_dir
189 # Note that readdir lists relative paths, as well as . and ..
190 opendir(DIR, "$src_dir");
191 my @files= readdir(DIR);
192 close(DIR);
193
194 my @full_path_files = ();
195 foreach my $file (@files) {
196 # process all except . and ..
197 unless($file eq "." || $file eq "..") {
198
199 my $dest_subdir = &filename_cat($dest_dir, $file); # $file still relative path
200
201 # construct absolute paths
202 $file = &filename_cat($src_dir, $file); # $file is now an absolute path
203
204 # Recurse on directories which have an equivalent in target dest_dir
205 # If $file is a directory that already exists in target $dest_dir,
206 # then a simple move operation will fail (definitely on Windows).
207 if(-d $file && -d $dest_subdir) {
208 print STDERR "**** $file is a directory also existing in target, to be copied to $dest_subdir\n";
209 &mv_dir_contents($file, $dest_subdir);
210
211 # now all content is moved across, delete empty dir in source folder
212 if(&is_dir_empty($file)) {
213 if (!rmdir $file) {
214 print STDERR "util::mv_dir_contents couldn't remove directory $file\n";
215 }
216 } else { # error
217 print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_dir\n";
218 }
219 } else { # process files and any directories that don't already exist with a simple move
220 push(@full_path_files, $file);
221 }
222 }
223 }
224
225 if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist
226 &mk_dir($dest_dir);
227 }
228
229 #print STDERR "@@@@@ Copying files to: $dest_dir\n";
230 if(@full_path_files) { # if non-empty, there's something to copy across
231 &mv(@full_path_files, $dest_dir);
232 }
233}
234
235
236# copies a file or a group of files
237sub cp {
238 my $dest = pop (@_);
239 my (@srcfiles) = @_;
240
241 # remove trailing slashes from source and destination files
242 $dest =~ s/[\\\/]+$//;
243 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
244
245 # a few sanity checks
246 if (scalar (@srcfiles) == 0) {
247 print STDERR "util::cp no destination directory given\n";
248 return;
249 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
250 print STDERR "util::cp if multiple source files are given the ".
251 "destination must be a directory\n";
252 return;
253 }
254
255 # copy the files
256 foreach my $file (@srcfiles) {
257 my $tempdest = $dest;
258 if (-d $tempdest) {
259 my ($filename) = $file =~ /([^\\\/]+)$/;
260 $tempdest .= "/$filename";
261 }
262 if (!-e $file) {
263 print STDERR "util::cp $file does not exist\n";
264 } elsif (!-f $file) {
265 print STDERR "util::cp $file is not a plain file\n";
266 } else {
267 &File::Copy::copy ($file, $tempdest);
268 }
269 }
270}
271
272
273
274# recursively copies a file or group of files
275# syntax: cp_r (sourcefiles, destination directory)
276# destination must be a directory - to copy one file to
277# another use cp instead
278sub cp_r {
279 my $dest = pop (@_);
280 my (@srcfiles) = @_;
281
282 # a few sanity checks
283 if (scalar (@srcfiles) == 0) {
284 print STDERR "util::cp_r no destination directory given\n";
285 return;
286 } elsif (-f $dest) {
287 print STDERR "util::cp_r destination must be a directory\n";
288 return;
289 }
290
291 # create destination directory if it doesn't exist already
292 if (! -d $dest) {
293 my $store_umask = umask(0002);
294 mkdir ($dest, 0777);
295 umask($store_umask);
296 }
297
298 # copy the files
299 foreach my $file (@srcfiles) {
300
301 if (!-e $file) {
302 print STDERR "util::cp_r $file does not exist\n";
303
304 } elsif (-d $file) {
305 # make the new directory
306 my ($filename) = $file =~ /([^\\\/]*)$/;
307 $dest = &util::filename_cat ($dest, $filename);
308 my $store_umask = umask(0002);
309 mkdir ($dest, 0777);
310 umask($store_umask);
311
312 # get the contents of this directory
313 if (!opendir (INDIR, $file)) {
314 print STDERR "util::cp_r could not open directory $file\n";
315 } else {
316 my @filedir = readdir (INDIR);
317 closedir (INDIR);
318 foreach my $f (@filedir) {
319 next if $f =~ /^\.\.?$/;
320 # copy all the files in this directory
321 my $ff = &util::filename_cat ($file, $f);
322 &cp_r ($ff, $dest);
323 }
324 }
325
326 } else {
327 &cp($file, $dest);
328 }
329 }
330}
331# recursively copies a file or group of files
332# syntax: cp_r (sourcefiles, destination directory)
333# destination must be a directory - to copy one file to
334# another use cp instead
335sub cp_r_nosvn {
336 my $dest = pop (@_);
337 my (@srcfiles) = @_;
338
339 # a few sanity checks
340 if (scalar (@srcfiles) == 0) {
341 print STDERR "util::cp_r no destination directory given\n";
342 return;
343 } elsif (-f $dest) {
344 print STDERR "util::cp_r destination must be a directory\n";
345 return;
346 }
347
348 # create destination directory if it doesn't exist already
349 if (! -d $dest) {
350 my $store_umask = umask(0002);
351 mkdir ($dest, 0777);
352 umask($store_umask);
353 }
354
355 # copy the files
356 foreach my $file (@srcfiles) {
357
358 if (!-e $file) {
359 print STDERR "util::cp_r $file does not exist\n";
360
361 } elsif (-d $file) {
362 # make the new directory
363 my ($filename) = $file =~ /([^\\\/]*)$/;
364 $dest = &util::filename_cat ($dest, $filename);
365 my $store_umask = umask(0002);
366 mkdir ($dest, 0777);
367 umask($store_umask);
368
369 # get the contents of this directory
370 if (!opendir (INDIR, $file)) {
371 print STDERR "util::cp_r could not open directory $file\n";
372 } else {
373 my @filedir = readdir (INDIR);
374 closedir (INDIR);
375 foreach my $f (@filedir) {
376 next if $f =~ /^\.\.?$/;
377 next if $f =~ /^\.svn$/;
378 # copy all the files in this directory
379 my $ff = &util::filename_cat ($file, $f);
380 &cp_r ($ff, $dest);
381 }
382 }
383
384 } else {
385 &cp($file, $dest);
386 }
387 }
388}
389
390# copies a directory and its contents, excluding subdirectories, into a new directory
391sub cp_r_toplevel {
392 my $dest = pop (@_);
393 my (@srcfiles) = @_;
394
395 # a few sanity checks
396 if (scalar (@srcfiles) == 0) {
397 print STDERR "util::cp_r no destination directory given\n";
398 return;
399 } elsif (-f $dest) {
400 print STDERR "util::cp_r destination must be a directory\n";
401 return;
402 }
403
404 # create destination directory if it doesn't exist already
405 if (! -d $dest) {
406 my $store_umask = umask(0002);
407 mkdir ($dest, 0777);
408 umask($store_umask);
409 }
410
411 # copy the files
412 foreach my $file (@srcfiles) {
413
414 if (!-e $file) {
415 print STDERR "util::cp_r $file does not exist\n";
416
417 } elsif (-d $file) {
418 # make the new directory
419 my ($filename) = $file =~ /([^\\\/]*)$/;
420 $dest = &util::filename_cat ($dest, $filename);
421 my $store_umask = umask(0002);
422 mkdir ($dest, 0777);
423 umask($store_umask);
424
425 # get the contents of this directory
426 if (!opendir (INDIR, $file)) {
427 print STDERR "util::cp_r could not open directory $file\n";
428 } else {
429 my @filedir = readdir (INDIR);
430 closedir (INDIR);
431 foreach my $f (@filedir) {
432 next if $f =~ /^\.\.?$/;
433
434 # copy all the files in this directory, but not directories
435 my $ff = &util::filename_cat ($file, $f);
436 if (-f $ff) {
437 &cp($ff, $dest);
438 #&cp_r ($ff, $dest);
439 }
440 }
441 }
442
443 } else {
444 &cp($file, $dest);
445 }
446 }
447}
448
449sub mk_dir {
450 my ($dir) = @_;
451
452 my $store_umask = umask(0002);
453 my $mkdir_ok = mkdir ($dir, 0777);
454 umask($store_umask);
455
456 if (!$mkdir_ok)
457 {
458 print STDERR "util::mk_dir could not create directory $dir\n";
459 return;
460 }
461}
462
463# in case anyone cares - I did some testing (using perls Benchmark module)
464# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
465# slightly faster (surprisingly) - Stefan.
466sub mk_all_dir {
467 my ($dir) = @_;
468
469 # use / for the directory separator, remove duplicate and
470 # trailing slashes
471 $dir=~s/[\\\/]+/\//g;
472 $dir=~s/[\\\/]+$//;
473
474 # make sure the cache directory exists
475 my $dirsofar = "";
476 my $first = 1;
477 foreach my $dirname (split ("/", $dir)) {
478 $dirsofar .= "/" unless $first;
479 $first = 0;
480
481 $dirsofar .= $dirname;
482
483 next if $dirname =~ /^(|[a-z]:)$/i;
484 if (!-e $dirsofar)
485 {
486 my $store_umask = umask(0002);
487 my $mkdir_ok = mkdir ($dirsofar, 0777);
488 umask($store_umask);
489 if (!$mkdir_ok)
490 {
491 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
492 return;
493 }
494 }
495 }
496}
497
498# make hard link to file if supported by OS, otherwise copy the file
499sub hard_link {
500 my ($src, $dest, $verbosity) = @_;
501
502 # remove trailing slashes from source and destination files
503 $src =~ s/[\\\/]+$//;
504 $dest =~ s/[\\\/]+$//;
505
506## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
507 # a few sanity checks
508 if (-e $dest) {
509 # destination file already exists
510 return;
511 }
512 elsif (!-e $src) {
513 print STDERR "util::hard_link source file \"$src\" does not exist\n";
514 return 1;
515 }
516 elsif (-d $src) {
517 print STDERR "util::hard_link source \"$src\" is a directory\n";
518 return 1;
519 }
520
521 my $dest_dir = &File::Basename::dirname($dest);
522 mk_all_dir($dest_dir) if (!-e $dest_dir);
523
524
525 if (!link($src, $dest)) {
526 if ((!defined $verbosity) || ($verbosity>2)) {
527 print STDERR "util::hard_link: unable to create hard link. ";
528 print STDERR " Copying file: $src -> $dest\n";
529 }
530 &File::Copy::copy ($src, $dest);
531 }
532 return 0;
533}
534
535# make soft link to file if supported by OS, otherwise copy file
536sub soft_link {
537 my ($src, $dest, $ensure_paths_absolute) = @_;
538
539 # remove trailing slashes from source and destination files
540 $src =~ s/[\\\/]+$//;
541 $dest =~ s/[\\\/]+$//;
542
543 # Ensure file paths are absolute IF requested to do so
544 # Soft_linking didn't work for relative paths
545 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
546 # We need to ensure that the src file is the absolute path
547 # See http://perldoc.perl.org/File/Spec.html
548 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
549 $src = File::Spec->rel2abs($src); # make absolute
550 }
551 # Might as well ensure that the destination file's absolute path is used
552 if(!File::Spec->file_name_is_absolute( $dest )) {
553 $dest = File::Spec->rel2abs($dest); # make absolute
554 }
555 }
556
557 # a few sanity checks
558 if (!-e $src) {
559 print STDERR "util::soft_link source file $src does not exist\n";
560 return 0;
561 }
562
563 my $dest_dir = &File::Basename::dirname($dest);
564 mk_all_dir($dest_dir) if (!-e $dest_dir);
565
566 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
567
568 # symlink not supported on windows
569 &File::Copy::copy ($src, $dest);
570
571 } elsif (!eval {symlink($src, $dest)}) {
572 print STDERR "util::soft_link: unable to create soft link.\n";
573 return 0;
574 }
575
576 return 1;
577}
578
579# Primarily for filenames generated by processing
580# content of HTML files (which are mapped to UTF-8 internally)
581#
582# To turn this into an octet string that really exists on the file
583# system:
584# 1. don't need to do anything special for Unix-based systems
585# (as underlying file system is byte-code)
586# 2. need to map to short DOS filenames for Windows
587
588sub utf8_to_real_filename
589{
590 my ($utf8_filename) = @_;
591
592 my $real_filename;
593
594 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
595 require Win32;
596
597 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
598
599 my $unicode_filename = decode("utf8",$utf8_filename);
600 $real_filename = Win32::GetShortPathName($unicode_filename);
601 }
602 else {
603 $real_filename = $utf8_filename;
604 }
605
606 return $real_filename;
607}
608
609
610sub fd_exists
611{
612 my $filename_full_path = shift @_;
613 my $test_op = shift @_ || "-e";
614
615 # By default tests for existance of file or directory (-e)
616 # Can be made more specific by providing second parameter (e.g. -f or -d)
617
618 my $exists = 0;
619
620 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
621 require Win32;
622 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
623 if (!defined $filename_short_path) {
624 # Was probably still in UTF8 form (not what is needed on Windows)
625 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
626 if (defined $unicode_filename_full_path) {
627 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
628 }
629 }
630 $filename_full_path = $filename_short_path;
631 }
632
633 if (defined $filename_full_path) {
634 $exists = eval "($test_op \$filename_full_path)";
635 }
636
637 return $exists;
638}
639
640sub file_exists
641{
642 my ($filename_full_path) = @_;
643
644 return fd_exists($filename_full_path,"-f");
645}
646
647sub dir_exists
648{
649 my ($filename_full_path) = @_;
650
651 return fd_exists($filename_full_path,"-d");
652}
653
654
655
656# updates a copy of a directory in some other part of the filesystem
657# verbosity settings are: 0=low, 1=normal, 2=high
658# both $fromdir and $todir should be absolute paths
659sub cachedir {
660 my ($fromdir, $todir, $verbosity) = @_;
661 $verbosity = 1 unless defined $verbosity;
662
663 # use / for the directory separator, remove duplicate and
664 # trailing slashes
665 $fromdir=~s/[\\\/]+/\//g;
666 $fromdir=~s/[\\\/]+$//;
667 $todir=~s/[\\\/]+/\//g;
668 $todir=~s/[\\\/]+$//;
669
670 &mk_all_dir ($todir);
671
672 # get the directories in ascending order
673 if (!opendir (FROMDIR, $fromdir)) {
674 print STDERR "util::cachedir could not read directory $fromdir\n";
675 return;
676 }
677 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
678 closedir (FROMDIR);
679
680 if (!opendir (TODIR, $todir)) {
681 print STDERR "util::cacedir could not read directory $todir\n";
682 return;
683 }
684 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
685 closedir (TODIR);
686
687 my $fromi = 0;
688 my $toi = 0;
689
690 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
691# print "fromi: $fromi toi: $toi\n";
692
693 # see if we should delete a file/directory
694 # this should happen if the file/directory
695 # is not in the from list or if its a different
696 # size, or has an older timestamp
697 if ($toi < scalar(@todir)) {
698 if (($fromi >= scalar(@fromdir)) ||
699 ($todir[$toi] lt $fromdir[$fromi] ||
700 ($todir[$toi] eq $fromdir[$fromi] &&
701 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
702 $verbosity)))) {
703
704 # the files are different
705 &rm_r("$todir/$todir[$toi]");
706 splice(@todir, $toi, 1); # $toi stays the same
707
708 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
709 # the files are the same
710 # if it is a directory, check its contents
711 if (-d "$todir/$todir[$toi]") {
712 &cachedir ("$fromdir/$fromdir[$fromi]",
713 "$todir/$todir[$toi]", $verbosity);
714 }
715
716 $toi++;
717 $fromi++;
718 next;
719 }
720 }
721
722 # see if we should insert a file/directory
723 # we should insert a file/directory if there
724 # is no tofiles left or if the tofile does not exist
725 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
726 $todir[$toi] gt $fromdir[$fromi])) {
727 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
728 splice (@todir, $toi, 0, $fromdir[$fromi]);
729
730 $toi++;
731 $fromi++;
732 }
733 }
734}
735
736# this function returns -1 if either file is not found
737# assumes that $file1 and $file2 are absolute file names or
738# in the current directory
739# $file2 is allowed to be newer than $file1
740sub differentfiles {
741 my ($file1, $file2, $verbosity) = @_;
742 $verbosity = 1 unless defined $verbosity;
743
744 $file1 =~ s/\/+$//;
745 $file2 =~ s/\/+$//;
746
747 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
748 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
749
750 return -1 unless (-e $file1 && -e $file2);
751 if ($file1name ne $file2name) {
752 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
753 return 1;
754 }
755
756 my @file1stat = stat ($file1);
757 my @file2stat = stat ($file2);
758
759 if (-d $file1) {
760 if (! -d $file2) {
761 print STDERR "one file is a directory\n" if ($verbosity >= 2);
762 return 1;
763 }
764 return 0;
765 }
766
767 # both must be regular files
768 unless (-f $file1 && -f $file2) {
769 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
770 return 1;
771 }
772
773 # the size of the files must be the same
774 if ($file1stat[7] != $file2stat[7]) {
775 print STDERR "different sized files\n" if ($verbosity >= 2);
776 return 1;
777 }
778
779 # the second file cannot be older than the first
780 if ($file1stat[9] > $file2stat[9]) {
781 print STDERR "file is older\n" if ($verbosity >= 2);
782 return 1;
783 }
784
785 return 0;
786}
787
788
789sub get_tmp_filename
790{
791 my $file_ext = shift(@_) || undef;
792
793 my $opt_dot_file_ext = "";
794 if (defined $file_ext) {
795 if ($file_ext !~ m/\./) {
796 # no dot, so needs one added in at start
797 $opt_dot_file_ext = ".$file_ext"
798 }
799 else {
800 # allow for "extensions" such as _metadata.txt to be handled
801 # gracefully
802 $opt_dot_file_ext = $file_ext;
803 }
804 }
805
806 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
807 &mk_all_dir ($tmpdir) unless -e $tmpdir;
808
809 my $count = 1000;
810 my $rand = int(rand $count);
811 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
812
813 while (-e $full_tmp_filename) {
814 $rand = int(rand $count);
815 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
816 $count++;
817 }
818
819 return $full_tmp_filename;
820}
821
822sub get_timestamped_tmp_folder
823{
824
825 my $tmp_dirname;
826 if(defined $ENV{'GSDLCOLLECTDIR'}) {
827 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
828 } elsif(defined $ENV{'GSDLHOME'}) {
829 $tmp_dirname = $ENV{'GSDLHOME'};
830 } else {
831 return undef;
832 }
833
834 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
835 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
836
837 # add the timestamp into the path otherwise we can run into problems
838 # if documents have the same name
839 my $timestamp = time;
840 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
841 $tmp_dirname = $time_tmp_dirname;
842 my $i = 1;
843 while (-e $tmp_dirname) {
844 $tmp_dirname = "$time_tmp_dirname$i";
845 $i++;
846 }
847 &util::mk_dir($tmp_dirname);
848
849 return $tmp_dirname;
850}
851
852sub get_timestamped_tmp_filename_in_collection
853{
854
855 my ($input_filename, $output_ext) = @_;
856 # derive tmp filename from input filename
857 my ($tailname, $dirname, $suffix)
858 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
859
860 # softlink to collection tmp dir
861 my $tmp_dirname = &util::get_timestamped_tmp_folder();
862 $tmp_dirname = $dirname unless defined $tmp_dirname;
863
864 # following two steps copied from ConvertBinaryFile
865 # do we need them?? can't use them as is, as they use plugin methods.
866
867 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
868
869 # URLEncode this since htmls with images where the html filename is utf8 don't seem
870 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
871 # files on the filesystem.
872 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
873 if (defined $output_ext) {
874 $output_ext = ".$output_ext"; # add the dot
875 } else {
876 $output_ext = $suffix;
877 }
878 $output_ext= lc($output_ext);
879 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
880
881 return $tmp_filename;
882}
883
884sub get_toplevel_tmp_dir
885{
886 return filename_cat($ENV{'GSDLHOME'}, "tmp");
887}
888
889
890sub filename_to_regex {
891 my $filename = shift (@_);
892
893 # need to make single backslashes double so that regex works
894 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
895
896 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
897 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
898 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
899 $filename =~ s@\(@\\(@g; # escape brackets
900 $filename =~ s@\)@\\)@g; # escape brackets
901 $filename =~ s@\[@\\[@g; # escape brackets
902 $filename =~ s@\]@\\]@g; # escape brackets
903
904 return $filename;
905}
906
907sub unregex_filename {
908 my $filename = shift (@_);
909
910 # need to put doubled backslashes for regex back to single
911 $filename =~ s/\\\./\./g; # remove RE syntax for .
912 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
913 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
914 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
915 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
916
917 # \\ goes to \
918 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
919 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
920 return $filename;
921}
922
923sub filename_cat {
924 my $first_file = shift(@_);
925 my (@filenames) = @_;
926
927# Useful for debugging
928# -- might make sense to call caller(0) rather than (1)??
929# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
930# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
931
932 # If first_file is not null or empty, then add it back into the list
933 if (defined $first_file && $first_file =~ /\S/) {
934 unshift(@filenames, $first_file);
935 }
936
937 my $filename = join("/", @filenames);
938
939 # remove duplicate slashes and remove the last slash
940 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
941 $filename =~ s/[\\\/]+/\\/g;
942 } else {
943 $filename =~ s/[\/]+/\//g;
944 # DB: want a filename abc\de.html to remain like this
945 }
946 $filename =~ s/[\\\/]$//;
947
948 return $filename;
949}
950
951
952sub pathname_cat {
953 my $first_path = shift(@_);
954 my (@pathnames) = @_;
955
956 # If first_path is not null or empty, then add it back into the list
957 if (defined $first_path && $first_path =~ /\S/) {
958 unshift(@pathnames, $first_path);
959 }
960
961 my $join_char;
962 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
963 $join_char = ";";
964 } else {
965 $join_char = ":";
966 }
967
968 my $pathname = join($join_char, @pathnames);
969
970 # remove duplicate slashes
971 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
972 $pathname =~ s/[\\\/]+/\\/g;
973 } else {
974 $pathname =~ s/[\/]+/\//g;
975 # DB: want a pathname abc\de.html to remain like this
976 }
977
978 return $pathname;
979}
980
981
982sub tidy_up_oid {
983 my ($OID) = @_;
984 if ($OID =~ /\./) {
985 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
986 $OID =~ s/\.//g; #remove any periods
987 }
988 if ($OID =~ /^\s.*\s$/) {
989 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
990 # remove starting and trailing whitespace
991 $OID =~ s/^\s+//;
992 $OID =~ s/\s+$//;
993 }
994 if ($OID =~ /^[\d]*$/) {
995 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
996 $OID = "D" . $OID;
997 }
998
999 return $OID;
1000}
1001sub envvar_prepend {
1002 my ($var,$val) = @_;
1003
1004 # do not prepend any value/path that's already in the environment variable
1005
1006 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1007 if (!defined($ENV{$var})) {
1008 $ENV{$var} = "$val";
1009 }
1010 elsif($ENV{$var} !~ m/$escaped_val/) {
1011 $ENV{$var} = "$val;".$ENV{$var};
1012 }
1013}
1014
1015sub envvar_append {
1016 my ($var,$val) = @_;
1017
1018 # do not append any value/path that's already in the environment variable
1019
1020 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1021 if (!defined($ENV{$var})) {
1022 $ENV{$var} = "$val";
1023 }
1024 elsif($ENV{$var} !~ m/$escaped_val/) {
1025 $ENV{$var} .= ";$val";
1026 }
1027}
1028
1029
1030# splits a filename into a prefix and a tail extension using the tail_re, or
1031# if that fails, splits on the file_extension . (dot)
1032sub get_prefix_and_tail_by_regex {
1033
1034 my ($filename,$tail_re) = @_;
1035
1036 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
1037 if ((!defined $file_prefix) || (!defined $file_ext)) {
1038 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1039 }
1040
1041 return ($file_prefix,$file_ext);
1042}
1043
1044# get full path and file only path from a base_dir (which may be empty) and
1045# file (which may contain directories)
1046sub get_full_filenames {
1047 my ($base_dir, $file) = @_;
1048
1049 my $filename_full_path = $file;
1050 # add on directory if present
1051 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1052
1053 my $filename_no_path = $file;
1054
1055 # remove directory if present
1056 $filename_no_path =~ s/^.*[\/\\]//;
1057 return ($filename_full_path, $filename_no_path);
1058}
1059
1060# returns the path of a file without the filename -- ie. the directory the file is in
1061sub filename_head {
1062 my $filename = shift(@_);
1063
1064 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1065 $filename =~ s/[^\\\\]*$//;
1066 }
1067 else {
1068 $filename =~ s/[^\\\/]*$//;
1069 }
1070
1071 return $filename;
1072}
1073
1074
1075
1076# returns 1 if filename1 and filename2 point to the same
1077# file or directory
1078sub filenames_equal {
1079 my ($filename1, $filename2) = @_;
1080
1081 # use filename_cat to clean up trailing slashes and
1082 # multiple slashes
1083 $filename1 = filename_cat ($filename1);
1084 $filename2 = filename_cat ($filename2);
1085
1086 # filenames not case sensitive on windows
1087 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1088 $filename1 =~ tr/[A-Z]/[a-z]/;
1089 $filename2 =~ tr/[A-Z]/[a-z]/;
1090 }
1091 return 1 if $filename1 eq $filename2;
1092 return 0;
1093}
1094
1095# If filename is relative to within_dir, returns the relative path of filename to that directory
1096# with slashes in the filename returned as they were in the original (absolute) filename.
1097sub filename_within_directory
1098{
1099 my ($filename,$within_dir) = @_;
1100
1101 if ($within_dir !~ m/[\/\\]$/) {
1102 my $dirsep = &util::get_dirsep();
1103 $within_dir .= $dirsep;
1104 }
1105
1106 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
1107 if ($filename =~ m/^$within_dir(.*)$/) {
1108 $filename = $1;
1109 }
1110
1111 return $filename;
1112}
1113
1114# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1115# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1116# The subpath returned will also be a URL type filename.
1117sub filename_within_directory_url_format
1118{
1119 my ($filename,$within_dir) = @_;
1120
1121 # convert parameters only to / slashes if Windows
1122
1123 my $filename_urlformat = &filepath_to_url_format($filename);
1124 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
1125
1126 #if ($within_dir_urlformat !~ m/\/$/) {
1127 # make sure directory ends with a slash
1128 #$within_dir_urlformat .= "/";
1129 #}
1130
1131 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1132
1133 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1134
1135 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1136 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1137 $filename_urlformat = $1;
1138 }
1139
1140 return $filename_urlformat;
1141}
1142
1143# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
1144# since on Linux it doesn't represent a file separator but an escape char).
1145sub filepath_to_url_format
1146{
1147 my ($filepath) = @_;
1148 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1149 # Only need to worry about Windows, as Unix style directories already in url-format
1150 # Convert Windows style \ => /
1151 $filepath =~ s@\\@/@g;
1152 }
1153 return $filepath;
1154}
1155
1156# regex filepaths on windows may include \\ as path separator. Convert \\ to /
1157sub filepath_regex_to_url_format
1158{
1159 my ($filepath) = @_;
1160 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1161 # Only need to worry about Windows, as Unix style directories already in url-format
1162 # Convert Windows style \\ => /
1163 $filepath =~ s@\\\\@/@g;
1164 }
1165 return $filepath;
1166
1167}
1168
1169# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
1170# and ignores trailing /
1171# returns (file, dirs) dirs will be empty if no subdirs
1172sub url_fileparse
1173{
1174 my ($filepath) = @_;
1175 # remove trailing /
1176 $filepath =~ s@/$@@;
1177 if ($filepath !~ m@/@) {
1178 return ($filepath, "");
1179 }
1180 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
1181 return ($file, $dirs);
1182
1183}
1184
1185
1186sub filename_within_collection
1187{
1188 my ($filename) = @_;
1189
1190 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1191
1192 if (defined $collect_dir) {
1193
1194 # if from within GSDLCOLLECTDIR, then remove directory prefix
1195 # so source_filename is realative to it. This is done to aid
1196 # portability, i.e. the collection can be moved to somewhere
1197 # else on the file system and the archives directory will still
1198 # work. This is needed, for example in the applet version of
1199 # GLI where GSDLHOME/collect on the server will be different to
1200 # the collect directory of the remove user. Of course,
1201 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1202 # it back into a full pathname.
1203
1204 $filename = filename_within_directory($filename,$collect_dir);
1205 }
1206
1207 return $filename;
1208}
1209
1210sub prettyprint_file
1211{
1212 my ($base_dir,$file,$gli) = @_;
1213
1214 my $filename_full_path = &util::filename_cat($base_dir,$file);
1215
1216 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1217 require Win32;
1218
1219 # For some reason base_dir in the form c:/a/b/c
1220 # This leads to confusion later on, so turn it back into
1221 # the more usual Windows form
1222 $base_dir =~ s/\//\\/g;
1223 my $long_base_dir = Win32::GetLongPathName($base_dir);
1224 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1225
1226 $file = filename_within_directory($long_full_path,$long_base_dir);
1227 $file = encode("utf8",$file) if ($gli);
1228 }
1229
1230 return $file;
1231}
1232
1233
1234sub upgrade_if_dos_filename
1235{
1236 my ($filename_full_path,$and_encode) = @_;
1237
1238 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1239 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1240 # to its long (Windows) version
1241 my $long_filename = Win32::GetLongPathName($filename_full_path);
1242 if (defined $long_filename) {
1243 $filename_full_path = $long_filename;
1244 }
1245 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1246 $filename_full_path =~ s/^(.):/\u$1:/;
1247 if ((defined $and_encode) && ($and_encode)) {
1248 $filename_full_path = encode("utf8",$filename_full_path);
1249 }
1250 }
1251
1252 return $filename_full_path;
1253}
1254
1255
1256sub downgrade_if_dos_filename
1257{
1258 my ($filename_full_path) = @_;
1259
1260 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1261 require Win32;
1262
1263 # Ensure the given long Windows filename is in a form that can
1264 # be opened by Perl => convert it to a short DOS-like filename
1265
1266 my $short_filename = Win32::GetShortPathName($filename_full_path);
1267 if (defined $short_filename) {
1268 $filename_full_path = $short_filename;
1269 }
1270 # Make sure initial drive letter is lower-case (to fit in
1271 # with rest of Greenstone)
1272 $filename_full_path =~ s/^(.):/\u$1:/;
1273 }
1274
1275 return $filename_full_path;
1276}
1277
1278sub block_filename
1279{
1280 my ($block_hash,$filename) = @_;
1281
1282 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1283
1284 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1285 my $lower_filename = lc($filename);
1286 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1287# my $lower_drive = $filename;
1288# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1289
1290# my $upper_drive = $filename;
1291# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1292#
1293# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1294# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1295 }
1296 else {
1297 $block_hash->{'file_blocks'}->{$filename} = 1;
1298 }
1299}
1300
1301
1302sub filename_is_absolute
1303{
1304 my ($filename) = @_;
1305
1306 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1307 return ($filename =~ m/^(\w:)?\\/);
1308 }
1309 else {
1310 return ($filename =~ m/^\//);
1311 }
1312}
1313
1314
1315## @method make_absolute()
1316#
1317# Ensure the given file path is absolute in respect to the given base path.
1318#
1319# @param $base_dir A string denoting the base path the given dir must be
1320# absolute to.
1321# @param $dir The directory to be made absolute as a string. Note that the
1322# dir may already be absolute, in which case it will remain
1323# unchanged.
1324# @return The now absolute form of the directory as a string.
1325#
1326# @author John Thompson, DL Consulting Ltd.
1327# @copy 2006 DL Consulting Ltd.
1328#
1329#used in buildcol.pl, doesn't work for all cases --kjdon
1330sub make_absolute {
1331
1332 my ($base_dir, $dir) = @_;
1333### print STDERR "dir = $dir\n";
1334 $dir =~ s/[\\\/]+/\//g;
1335 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1336 $dir =~ s|^/tmp_mnt||;
1337 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1338 $dir =~ s|/[.][.]?/|/|g;
1339 $dir =~ tr|/|/|s;
1340### print STDERR "dir = $dir\n";
1341
1342 return $dir;
1343}
1344## make_absolute() ##
1345
1346sub get_dirsep {
1347
1348 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1349 return "\\";
1350 } else {
1351 return "\/";
1352 }
1353}
1354
1355sub get_os_dirsep {
1356
1357 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1358 return "\\\\";
1359 } else {
1360 return "\\\/";
1361 }
1362}
1363
1364sub get_re_dirsep {
1365
1366 return "\\\\|\\\/";
1367}
1368
1369
1370sub get_dirsep_tail {
1371 my ($filename) = @_;
1372
1373 # returns last part of directory or filename
1374 # On unix e.g. a/b.d => b.d
1375 # a/b/c => c
1376
1377 my $dirsep = get_re_dirsep();
1378 my @dirs = split (/$dirsep/, $filename);
1379 my $tail = pop @dirs;
1380
1381 # - caused problems under windows
1382 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1383
1384 return $tail;
1385}
1386
1387
1388# if this is running on windows we want binaries to end in
1389# .exe, otherwise they don't have to end in any extension
1390sub get_os_exe {
1391 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1392 return "";
1393}
1394
1395
1396# test to see whether this is a big or little endian machine
1397sub is_little_endian
1398{
1399 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1400 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1401 # Otherwise, it's little endian
1402
1403 #return 0 if $^O =~ /^darwin$/i;
1404 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1405
1406 # Going back to stating exactly whether the machine is little endian
1407 # or big endian, without any special case for Macs. Since for rata it comes
1408 # back with little endian and for shuttle with bigendian.
1409 return (ord(substr(pack("s",1), 0, 1)) == 1);
1410}
1411
1412
1413# will return the collection name if successful, "" otherwise
1414sub use_collection {
1415 my ($collection, $collectdir) = @_;
1416
1417 if (!defined $collectdir || $collectdir eq "") {
1418 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1419 }
1420
1421 # get and check the collection
1422 if (!defined($collection) || $collection eq "") {
1423 if (defined $ENV{'GSDLCOLLECTION'}) {
1424 $collection = $ENV{'GSDLCOLLECTION'};
1425 } else {
1426 print STDOUT "No collection specified\n";
1427 return "";
1428 }
1429 }
1430
1431 if ($collection eq "modelcol") {
1432 print STDOUT "You can't use modelcol.\n";
1433 return "";
1434 }
1435
1436 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1437 # are defined
1438 $ENV{'GSDLCOLLECTION'} = $collection;
1439 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1440
1441 # make sure this collection exists
1442 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1443 print STDOUT "Invalid collection ($collection).\n";
1444 return "";
1445 }
1446
1447 # everything is ready to go
1448 return $collection;
1449}
1450
1451sub get_current_collection_name {
1452 return $ENV{'GSDLCOLLECTION'};
1453}
1454
1455
1456# will return the collection name if successful, "" otherwise.
1457# Like use_collection (above) but for greenstone 3 (taking account of site level)
1458
1459sub use_site_collection {
1460 my ($site, $collection, $collectdir) = @_;
1461
1462 if (!defined $collectdir || $collectdir eq "") {
1463 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1464 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1465 }
1466
1467 # collectdir explicitly set by this point (using $site variable if required).
1468 # Can call "old" gsdl2 use_collection now.
1469
1470 return use_collection($collection,$collectdir);
1471}
1472
1473
1474
1475sub locate_config_file
1476{
1477 my ($file) = @_;
1478
1479 my $locations = locate_config_files($file);
1480
1481 return shift @$locations; # returns undef if 'locations' is empty
1482}
1483
1484
1485sub locate_config_files
1486{
1487 my ($file) = @_;
1488
1489 my @locations = ();
1490
1491 if (-e $file) {
1492 # Clearly specified (most likely full filename)
1493 # No need to hunt in 'etc' directories, return value unchanged
1494 push(@locations,$file);
1495 }
1496 else {
1497 # Check for collection specific one before looking in global GSDL 'etc'
1498 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1499 my $test_collect_etc_filename
1500 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1501
1502 if (-e $test_collect_etc_filename) {
1503 push(@locations,$test_collect_etc_filename);
1504 }
1505 }
1506 my $test_main_etc_filename
1507 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1508 if (-e $test_main_etc_filename) {
1509 push(@locations,$test_main_etc_filename);
1510 }
1511 }
1512
1513 return \@locations;
1514}
1515
1516
1517sub hyperlink_text
1518{
1519 my ($text) = @_;
1520
1521 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1522 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1523
1524 return $text;
1525}
1526
1527
1528# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1529# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1530sub is_dir_empty
1531{
1532 my ($path) = @_;
1533 opendir DIR, $path;
1534 while(my $entry = readdir DIR) {
1535 next if($entry =~ /^\.\.?$/);
1536 closedir DIR;
1537 return 0;
1538 }
1539 closedir DIR;
1540 return 1;
1541}
1542
1543# Returns the given filename converted using either URL encoding or base64
1544# encoding, as specified by $rename_method. If the given filename has no suffix
1545# (if it is just the tailname), then $no_suffix should be some defined value.
1546# rename_method can be url, none, base64
1547sub rename_file {
1548 my ($filename, $rename_method, $no_suffix) = @_;
1549
1550 if(!$filename) { # undefined or empty string
1551 return $filename;
1552 }
1553
1554 if (!$rename_method) {
1555 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1556 # Debugging information
1557 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1558 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1559 $rename_method = "url";
1560 } elsif($rename_method eq "none") {
1561 return $filename; # would have already been renamed
1562 }
1563
1564 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1565 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1566 ###$filename =~ s/ /_/g;
1567
1568 my ($tailname,$dirname,$suffix);
1569 if($no_suffix) { # given a tailname, no suffix
1570 ($tailname,$dirname) = File::Basename::fileparse($filename);
1571 }
1572 else {
1573 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1574 }
1575 if (!$suffix) {
1576 $suffix = "";
1577 }
1578 else {
1579 $suffix = lc($suffix);
1580 }
1581
1582 if ($rename_method eq "url") {
1583 $tailname = &unicode::url_encode($tailname);
1584 }
1585 elsif ($rename_method eq "base64") {
1586 $tailname = &unicode::base64_encode($tailname);
1587 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1588 }
1589
1590 $filename = "$tailname$suffix";
1591 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1592
1593 return $filename;
1594}
1595
1596
1597# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1598sub rename_ldb_or_bdb_file {
1599 my ($filename_no_ext) = @_;
1600
1601 my $new_filename = "$filename_no_ext.gdb";
1602 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1603 # try ldb
1604 my $old_filename = "$filename_no_ext.ldb";
1605
1606 if (-f $old_filename) {
1607 print STDERR "Renaming $old_filename to $new_filename\n";
1608 rename ($old_filename, $new_filename)
1609 || print STDERR "Rename failed: $!\n";
1610 return;
1611 }
1612 # try bdb
1613 $old_filename = "$filename_no_ext.bdb";
1614 if (-f $old_filename) {
1615 print STDERR "Renaming $old_filename to $new_filename\n";
1616 rename ($old_filename, $new_filename)
1617 || print STDERR "Rename failed: $!\n";
1618 return;
1619 }
1620}
1621
1622sub os_dir() {
1623
1624 my $gsdlarch = "";
1625 if(defined $ENV{'GSDLARCH'}) {
1626 $gsdlarch = $ENV{'GSDLARCH'};
1627 }
1628 return $ENV{'GSDLOS'}.$gsdlarch;
1629}
1630
1631# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1632# By default, /greenstone3 for GS3 or /greenstone for GS2.
1633sub get_greenstone_url_prefix() {
1634 # if already set on a previous occasion, just return that
1635 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1636 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1637
1638 my ($configfile, $urlprefix, $defaultUrlprefix);
1639 my @propertynames = ();
1640
1641 if($ENV{'GSDL3SRCHOME'}) {
1642 $defaultUrlprefix = "/greenstone3";
1643 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1644 push(@propertynames, qw/path\s*\=/);
1645 } else {
1646 $defaultUrlprefix = "/greenstone";
1647 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1648 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1649 }
1650
1651 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1652
1653 if(!$urlprefix) { # no values found for URL prefix, use default values
1654 $urlprefix = $defaultUrlprefix;
1655 } else {
1656 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1657 $urlprefix =~ s/^\///; # remove the starting slash
1658 my @dirs = split(/(\\|\/)/, $urlprefix);
1659 $urlprefix = shift(@dirs);
1660
1661 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1662 $urlprefix = "/$urlprefix";
1663 }
1664 }
1665
1666 # set for the future
1667 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1668# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1669 return $urlprefix;
1670}
1671
1672
1673# Given a config file (xml or java properties file) and a list/array of regular expressions
1674# that represent property names to match on, this function will return the value for the 1st
1675# matching property name. If the return value is undefined, no matching property was found.
1676sub extract_propvalue_from_file() {
1677 my ($configfile, $propertynames) = @_;
1678
1679 my $value;
1680 unless(open(FIN, "<$configfile")) {
1681 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1682 return $value; # not initialised
1683 }
1684
1685 # Read the entire file at once, as one single line, then close it
1686 my $filecontents;
1687 {
1688 local $/ = undef;
1689 $filecontents = <FIN>;
1690 }
1691 close(FIN);
1692
1693 foreach my $regex (@$propertynames) {
1694 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1695 if($value) {
1696 $value =~ s/^\"//; # remove any startquotes
1697 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1698 last; # found value for a matching property, break from loop
1699 }
1700 }
1701
1702 return $value;
1703}
1704
1705# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1706# given that perllib is in @INC in order to invoke this subroutine.
1707# Call as follows -- after setting up INC to include perllib and
1708# after setting up GSDLHOME and GSDLOS:
1709#
1710# require util;
1711# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1712#
1713sub setup_greenstone_env() {
1714 my ($GSDLHOME, $GSDLOS) = @_;
1715
1716 #my %env_map = ();
1717 # Get the localised ENV settings of running a localised source setup.bash
1718 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1719 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1720 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1721 if($GSDLOS =~ m/windows/i) {
1722 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1723 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1724 }
1725 if (!open(PIN, "$perl_command |")) {
1726 print STDERR ("Unable to execute command: $perl_command. $!\n");
1727 }
1728
1729 while (defined (my $perl_output_line = <PIN>)) {
1730 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1731 #$env_map{$key}=$value;
1732 $ENV{$key}=$value;
1733 }
1734 close (PIN);
1735
1736 # If any keys in $ENV don't occur in Greenstone's localised env
1737 # (stored in $env_map), delete those entries from $ENV
1738 #foreach $key (keys %ENV) {
1739 # if(!defined $env_map{$key}) {
1740 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1741 # delete $ENV{$key}; # del $ENV(key, value) pair
1742 # }
1743 #}
1744 #undef %env_map;
1745}
1746
1747sub get_perl_exec() {
1748 my $perl_exec = $^X; # may return just "perl"
1749
1750 if($ENV{'PERLPATH'}) {
1751 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1752 if($ENV{'GSDLOS'} =~ m/windows/) {
1753 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1754 } else {
1755 $perl_exec = "$ENV{'PERLPATH'}/perl";
1756 }
1757 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1758 # containing the full path to the current perl executable we're using
1759 $perl_exec = $Config{perlpath}; # configured path for perl
1760 if (!-e $perl_exec) { # may not point to location on this machine
1761 $perl_exec = $^X; # may return just "perl"
1762 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1763 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1764 }
1765 }
1766 }
1767
1768 return $perl_exec;
1769}
1770
1771# returns the path to the java command in the JRE included with GS (if any),
1772# quoted to safeguard any spaces in this path, otherwise a simple java
1773# command is returned which assumes and will try for a system java.
1774sub get_java_command {
1775 my $java = "java";
1776 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1777 # after running setup.bat or from GLI which also runs setup.bat
1778 my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin");
1779 if(-d $java_bin) {
1780 $java = &util::filename_cat($java_bin,"java");
1781 $java = "\"".$java."\""; # quoted to preserve spaces in path
1782 }
1783 }
1784 return $java;
1785}
1786
1787
1788# Given the qualified collection name (colgroup/collection),
1789# returns the collection and colgroup parts
1790sub get_collection_parts {
1791 # http://perldoc.perl.org/File/Basename.html
1792 # my($filename, $directories, $suffix) = fileparse($path);
1793 # "$directories contains everything up to and including the last directory separator in the $path
1794 # including the volume (if applicable). The remainder of the $path is the $filename."
1795 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1796
1797 my $qualified_collection = shift(@_);
1798
1799 # Since activate.pl can be launched from the command-line, including by a user,
1800 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1801 # Also allow for the accidental inclusion of multiple slashes
1802 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1803
1804 if(!defined $collection) {
1805 $collection = $colgroup;
1806 $colgroup = "";
1807 }
1808 return ($collection, $colgroup);
1809}
1810
1811# work out the "collectdir/collection" location
1812sub resolve_collection_dir {
1813 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1814
1815 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1816
1817 if (defined $collect_dir) {
1818 return &util::filename_cat($collect_dir,$colgroup, $collection);
1819 }
1820 else {
1821 if (defined $site) {
1822 return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
1823 }
1824 else {
1825 return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
1826 }
1827 }
1828}
1829
18301;
Note: See TracBrowser for help on using the repository browser.