source: main/trunk/greenstone2/perllib/util.pm@ 24932

Last change on this file since 24932 was 24932, checked in by ak19, 12 years ago

Diego noticed how the metadata in a toplevel metadata.xml, which specifies metadata for files in import's subfolders, does not get attached to the files on Windows, while this works on Linux. It had to do with the difference between the file slashes used on the OS versus the URL-type fileslashes used in the metadata.xml Diego had constructed. This has now been fixed and Dr Bainbridge came up with a tidier solution of a new method in util.pm that would handle the details.

  • Property svn:keywords set to Author Date Id Revision
File size: 46.5 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
42
43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
47 foreach my $file (@files) {
48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
71
72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
89
90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96 # recursively remove the files
97 foreach my $file (@files_array) {
98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
101 print STDERR "util::filtered_rm_r $file does not exist\n";
102
103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
106 print STDERR "util::filtered_rm_r could not open directory $file\n";
107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
110
111 # remove all the files in this directory
112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
120 }
121 }
122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
129 }
130 }
131}
132
133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
151
152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
167 foreach my $file (@srcfiles) {
168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 rename ($file, $tempdest);
177 }
178 }
179}
180
181
182# copies a file or a group of files
183sub cp {
184 my $dest = pop (@_);
185 my (@srcfiles) = @_;
186
187 # remove trailing slashes from source and destination files
188 $dest =~ s/[\\\/]+$//;
189 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
190
191 # a few sanity checks
192 if (scalar (@srcfiles) == 0) {
193 print STDERR "util::cp no destination directory given\n";
194 return;
195 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
196 print STDERR "util::cp if multiple source files are given the ".
197 "destination must be a directory\n";
198 return;
199 }
200
201 # copy the files
202 foreach my $file (@srcfiles) {
203 my $tempdest = $dest;
204 if (-d $tempdest) {
205 my ($filename) = $file =~ /([^\\\/]+)$/;
206 $tempdest .= "/$filename";
207 }
208 if (!-e $file) {
209 print STDERR "util::cp $file does not exist\n";
210 } elsif (!-f $file) {
211 print STDERR "util::cp $file is not a plain file\n";
212 } else {
213 &File::Copy::copy ($file, $tempdest);
214 }
215 }
216}
217
218
219
220# recursively copies a file or group of files
221# syntax: cp_r (sourcefiles, destination directory)
222# destination must be a directory - to copy one file to
223# another use cp instead
224sub cp_r {
225 my $dest = pop (@_);
226 my (@srcfiles) = @_;
227
228 # a few sanity checks
229 if (scalar (@srcfiles) == 0) {
230 print STDERR "util::cp_r no destination directory given\n";
231 return;
232 } elsif (-f $dest) {
233 print STDERR "util::cp_r destination must be a directory\n";
234 return;
235 }
236
237 # create destination directory if it doesn't exist already
238 if (! -d $dest) {
239 my $store_umask = umask(0002);
240 mkdir ($dest, 0777);
241 umask($store_umask);
242 }
243
244 # copy the files
245 foreach my $file (@srcfiles) {
246
247 if (!-e $file) {
248 print STDERR "util::cp_r $file does not exist\n";
249
250 } elsif (-d $file) {
251 # make the new directory
252 my ($filename) = $file =~ /([^\\\/]*)$/;
253 $dest = &util::filename_cat ($dest, $filename);
254 my $store_umask = umask(0002);
255 mkdir ($dest, 0777);
256 umask($store_umask);
257
258 # get the contents of this directory
259 if (!opendir (INDIR, $file)) {
260 print STDERR "util::cp_r could not open directory $file\n";
261 } else {
262 my @filedir = readdir (INDIR);
263 closedir (INDIR);
264 foreach my $f (@filedir) {
265 next if $f =~ /^\.\.?$/;
266 # copy all the files in this directory
267 my $ff = &util::filename_cat ($file, $f);
268 &cp_r ($ff, $dest);
269 }
270 }
271
272 } else {
273 &cp($file, $dest);
274 }
275 }
276}
277# recursively copies a file or group of files
278# syntax: cp_r (sourcefiles, destination directory)
279# destination must be a directory - to copy one file to
280# another use cp instead
281sub cp_r_nosvn {
282 my $dest = pop (@_);
283 my (@srcfiles) = @_;
284
285 # a few sanity checks
286 if (scalar (@srcfiles) == 0) {
287 print STDERR "util::cp_r no destination directory given\n";
288 return;
289 } elsif (-f $dest) {
290 print STDERR "util::cp_r destination must be a directory\n";
291 return;
292 }
293
294 # create destination directory if it doesn't exist already
295 if (! -d $dest) {
296 my $store_umask = umask(0002);
297 mkdir ($dest, 0777);
298 umask($store_umask);
299 }
300
301 # copy the files
302 foreach my $file (@srcfiles) {
303
304 if (!-e $file) {
305 print STDERR "util::cp_r $file does not exist\n";
306
307 } elsif (-d $file) {
308 # make the new directory
309 my ($filename) = $file =~ /([^\\\/]*)$/;
310 $dest = &util::filename_cat ($dest, $filename);
311 my $store_umask = umask(0002);
312 mkdir ($dest, 0777);
313 umask($store_umask);
314
315 # get the contents of this directory
316 if (!opendir (INDIR, $file)) {
317 print STDERR "util::cp_r could not open directory $file\n";
318 } else {
319 my @filedir = readdir (INDIR);
320 closedir (INDIR);
321 foreach my $f (@filedir) {
322 next if $f =~ /^\.\.?$/;
323 next if $f =~ /^\.svn$/;
324 # copy all the files in this directory
325 my $ff = &util::filename_cat ($file, $f);
326 &cp_r ($ff, $dest);
327 }
328 }
329
330 } else {
331 &cp($file, $dest);
332 }
333 }
334}
335
336# copies a directory and its contents, excluding subdirectories, into a new directory
337sub cp_r_toplevel {
338 my $dest = pop (@_);
339 my (@srcfiles) = @_;
340
341 # a few sanity checks
342 if (scalar (@srcfiles) == 0) {
343 print STDERR "util::cp_r no destination directory given\n";
344 return;
345 } elsif (-f $dest) {
346 print STDERR "util::cp_r destination must be a directory\n";
347 return;
348 }
349
350 # create destination directory if it doesn't exist already
351 if (! -d $dest) {
352 my $store_umask = umask(0002);
353 mkdir ($dest, 0777);
354 umask($store_umask);
355 }
356
357 # copy the files
358 foreach my $file (@srcfiles) {
359
360 if (!-e $file) {
361 print STDERR "util::cp_r $file does not exist\n";
362
363 } elsif (-d $file) {
364 # make the new directory
365 my ($filename) = $file =~ /([^\\\/]*)$/;
366 $dest = &util::filename_cat ($dest, $filename);
367 my $store_umask = umask(0002);
368 mkdir ($dest, 0777);
369 umask($store_umask);
370
371 # get the contents of this directory
372 if (!opendir (INDIR, $file)) {
373 print STDERR "util::cp_r could not open directory $file\n";
374 } else {
375 my @filedir = readdir (INDIR);
376 closedir (INDIR);
377 foreach my $f (@filedir) {
378 next if $f =~ /^\.\.?$/;
379
380 # copy all the files in this directory, but not directories
381 my $ff = &util::filename_cat ($file, $f);
382 if (-f $ff) {
383 &cp($ff, $dest);
384 #&cp_r ($ff, $dest);
385 }
386 }
387 }
388
389 } else {
390 &cp($file, $dest);
391 }
392 }
393}
394
395sub mk_dir {
396 my ($dir) = @_;
397
398 my $store_umask = umask(0002);
399 my $mkdir_ok = mkdir ($dir, 0777);
400 umask($store_umask);
401
402 if (!$mkdir_ok)
403 {
404 print STDERR "util::mk_dir could not create directory $dir\n";
405 return;
406 }
407}
408
409# in case anyone cares - I did some testing (using perls Benchmark module)
410# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
411# slightly faster (surprisingly) - Stefan.
412sub mk_all_dir {
413 my ($dir) = @_;
414
415 # use / for the directory separator, remove duplicate and
416 # trailing slashes
417 $dir=~s/[\\\/]+/\//g;
418 $dir=~s/[\\\/]+$//;
419
420 # make sure the cache directory exists
421 my $dirsofar = "";
422 my $first = 1;
423 foreach my $dirname (split ("/", $dir)) {
424 $dirsofar .= "/" unless $first;
425 $first = 0;
426
427 $dirsofar .= $dirname;
428
429 next if $dirname =~ /^(|[a-z]:)$/i;
430 if (!-e $dirsofar)
431 {
432 my $store_umask = umask(0002);
433 my $mkdir_ok = mkdir ($dirsofar, 0777);
434 umask($store_umask);
435 if (!$mkdir_ok)
436 {
437 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
438 return;
439 }
440 }
441 }
442}
443
444# make hard link to file if supported by OS, otherwise copy the file
445sub hard_link {
446 my ($src, $dest, $verbosity) = @_;
447
448 # remove trailing slashes from source and destination files
449 $src =~ s/[\\\/]+$//;
450 $dest =~ s/[\\\/]+$//;
451
452## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
453 # a few sanity checks
454 if (-e $dest) {
455 # destination file already exists
456 return;
457 }
458 elsif (!-e $src) {
459 print STDERR "util::hard_link source file \"$src\" does not exist\n";
460 return 1;
461 }
462 elsif (-d $src) {
463 print STDERR "util::hard_link source \"$src\" is a directory\n";
464 return 1;
465 }
466
467 my $dest_dir = &File::Basename::dirname($dest);
468 mk_all_dir($dest_dir) if (!-e $dest_dir);
469
470
471 if (!link($src, $dest)) {
472 if ((!defined $verbosity) || ($verbosity>2)) {
473 print STDERR "util::hard_link: unable to create hard link. ";
474 print STDERR " Copying file: $src -> $dest\n";
475 }
476 &File::Copy::copy ($src, $dest);
477 }
478 return 0;
479}
480
481# make soft link to file if supported by OS, otherwise copy file
482sub soft_link {
483 my ($src, $dest, $ensure_paths_absolute) = @_;
484
485 # remove trailing slashes from source and destination files
486 $src =~ s/[\\\/]+$//;
487 $dest =~ s/[\\\/]+$//;
488
489 # Ensure file paths are absolute IF requested to do so
490 # Soft_linking didn't work for relative paths
491 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
492 # We need to ensure that the src file is the absolute path
493 # See http://perldoc.perl.org/File/Spec.html
494 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
495 $src = File::Spec->rel2abs($src); # make absolute
496 }
497 # Might as well ensure that the destination file's absolute path is used
498 if(!File::Spec->file_name_is_absolute( $dest )) {
499 $dest = File::Spec->rel2abs($dest); # make absolute
500 }
501 }
502
503 # a few sanity checks
504 if (!-e $src) {
505 print STDERR "util::soft_link source file $src does not exist\n";
506 return 0;
507 }
508
509 my $dest_dir = &File::Basename::dirname($dest);
510 mk_all_dir($dest_dir) if (!-e $dest_dir);
511
512 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
513
514 # symlink not supported on windows
515 &File::Copy::copy ($src, $dest);
516
517 } elsif (!eval {symlink($src, $dest)}) {
518 print STDERR "util::soft_link: unable to create soft link.\n";
519 return 0;
520 }
521
522 return 1;
523}
524
525# Primarily for filenames generated by processing
526# content of HTML files (which are mapped to UTF-8 internally)
527#
528# To turn this into an octet string that really exists on the file
529# system:
530# 1. don't need to do anything special for Unix-based systems
531# (as underlying file system is byte-code)
532# 2. need to map to short DOS filenames for Windows
533
534sub utf8_to_real_filename
535{
536 my ($utf8_filename) = @_;
537
538 my $real_filename;
539
540 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
541 require Win32;
542
543 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
544
545 my $unicode_filename = decode("utf8",$utf8_filename);
546 $real_filename = Win32::GetShortPathName($unicode_filename);
547 }
548 else {
549 $real_filename = $utf8_filename;
550 }
551
552 return $real_filename;
553}
554
555
556sub fd_exists
557{
558 my $filename_full_path = shift @_;
559 my $test_op = shift @_ || "-e";
560
561 # By default tests for existance of file or directory (-e)
562 # Can be made more specific by providing second parameter (e.g. -f or -d)
563
564 my $exists = 0;
565
566 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
567 require Win32;
568 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
569 if (!defined $filename_short_path) {
570 # Was probably still in UTF8 form (not what is needed on Windows)
571 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
572 if (defined $unicode_filename_full_path) {
573 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
574 }
575 }
576 $filename_full_path = $filename_short_path;
577 }
578
579 if (defined $filename_full_path) {
580 $exists = eval "($test_op \$filename_full_path)";
581 }
582
583 return $exists;
584}
585
586sub file_exists
587{
588 my ($filename_full_path) = @_;
589
590 return fd_exists($filename_full_path,"-f");
591}
592
593sub dir_exists
594{
595 my ($filename_full_path) = @_;
596
597 return fd_exists($filename_full_path,"-d");
598}
599
600
601
602# updates a copy of a directory in some other part of the filesystem
603# verbosity settings are: 0=low, 1=normal, 2=high
604# both $fromdir and $todir should be absolute paths
605sub cachedir {
606 my ($fromdir, $todir, $verbosity) = @_;
607 $verbosity = 1 unless defined $verbosity;
608
609 # use / for the directory separator, remove duplicate and
610 # trailing slashes
611 $fromdir=~s/[\\\/]+/\//g;
612 $fromdir=~s/[\\\/]+$//;
613 $todir=~s/[\\\/]+/\//g;
614 $todir=~s/[\\\/]+$//;
615
616 &mk_all_dir ($todir);
617
618 # get the directories in ascending order
619 if (!opendir (FROMDIR, $fromdir)) {
620 print STDERR "util::cachedir could not read directory $fromdir\n";
621 return;
622 }
623 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
624 closedir (FROMDIR);
625
626 if (!opendir (TODIR, $todir)) {
627 print STDERR "util::cacedir could not read directory $todir\n";
628 return;
629 }
630 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
631 closedir (TODIR);
632
633 my $fromi = 0;
634 my $toi = 0;
635
636 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
637# print "fromi: $fromi toi: $toi\n";
638
639 # see if we should delete a file/directory
640 # this should happen if the file/directory
641 # is not in the from list or if its a different
642 # size, or has an older timestamp
643 if ($toi < scalar(@todir)) {
644 if (($fromi >= scalar(@fromdir)) ||
645 ($todir[$toi] lt $fromdir[$fromi] ||
646 ($todir[$toi] eq $fromdir[$fromi] &&
647 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
648 $verbosity)))) {
649
650 # the files are different
651 &rm_r("$todir/$todir[$toi]");
652 splice(@todir, $toi, 1); # $toi stays the same
653
654 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
655 # the files are the same
656 # if it is a directory, check its contents
657 if (-d "$todir/$todir[$toi]") {
658 &cachedir ("$fromdir/$fromdir[$fromi]",
659 "$todir/$todir[$toi]", $verbosity);
660 }
661
662 $toi++;
663 $fromi++;
664 next;
665 }
666 }
667
668 # see if we should insert a file/directory
669 # we should insert a file/directory if there
670 # is no tofiles left or if the tofile does not exist
671 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
672 $todir[$toi] gt $fromdir[$fromi])) {
673 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
674 splice (@todir, $toi, 0, $fromdir[$fromi]);
675
676 $toi++;
677 $fromi++;
678 }
679 }
680}
681
682# this function returns -1 if either file is not found
683# assumes that $file1 and $file2 are absolute file names or
684# in the current directory
685# $file2 is allowed to be newer than $file1
686sub differentfiles {
687 my ($file1, $file2, $verbosity) = @_;
688 $verbosity = 1 unless defined $verbosity;
689
690 $file1 =~ s/\/+$//;
691 $file2 =~ s/\/+$//;
692
693 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
694 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
695
696 return -1 unless (-e $file1 && -e $file2);
697 if ($file1name ne $file2name) {
698 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
699 return 1;
700 }
701
702 my @file1stat = stat ($file1);
703 my @file2stat = stat ($file2);
704
705 if (-d $file1) {
706 if (! -d $file2) {
707 print STDERR "one file is a directory\n" if ($verbosity >= 2);
708 return 1;
709 }
710 return 0;
711 }
712
713 # both must be regular files
714 unless (-f $file1 && -f $file2) {
715 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
716 return 1;
717 }
718
719 # the size of the files must be the same
720 if ($file1stat[7] != $file2stat[7]) {
721 print STDERR "different sized files\n" if ($verbosity >= 2);
722 return 1;
723 }
724
725 # the second file cannot be older than the first
726 if ($file1stat[9] > $file2stat[9]) {
727 print STDERR "file is older\n" if ($verbosity >= 2);
728 return 1;
729 }
730
731 return 0;
732}
733
734
735sub get_tmp_filename
736{
737 my $file_ext = shift(@_) || undef;
738
739 my $opt_dot_file_ext = "";
740 if (defined $file_ext) {
741 if ($file_ext !~ m/\./) {
742 # no dot, so needs one added in at start
743 $opt_dot_file_ext = ".$file_ext"
744 }
745 else {
746 # allow for "extensions" such as _metadata.txt to be handled
747 # gracefully
748 $opt_dot_file_ext = $file_ext;
749 }
750 }
751
752 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
753 &mk_all_dir ($tmpdir) unless -e $tmpdir;
754
755 my $count = 1000;
756 my $rand = int(rand $count);
757 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
758
759 while (-e $full_tmp_filename) {
760 $rand = int(rand $count);
761 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
762 $count++;
763 }
764
765 return $full_tmp_filename;
766}
767
768sub get_timestamped_tmp_folder
769{
770
771 my $tmp_dirname;
772 if(defined $ENV{'GSDLCOLLECTDIR'}) {
773 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
774 } elsif(defined $ENV{'GSDLHOME'}) {
775 $tmp_dirname = $ENV{'GSDLHOME'};
776 } else {
777 return undef;
778 }
779
780 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
781 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
782
783 # add the timestamp into the path otherwise we can run into problems
784 # if documents have the same name
785 my $timestamp = time;
786 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
787 $tmp_dirname = $time_tmp_dirname;
788 my $i = 1;
789 while (-e $tmp_dirname) {
790 $tmp_dirname = "$time_tmp_dirname$i";
791 $i++;
792 }
793 &util::mk_dir($tmp_dirname);
794
795 return $tmp_dirname;
796}
797
798sub get_timestamped_tmp_filename_in_collection
799{
800
801 my ($input_filename, $output_ext) = @_;
802 # derive tmp filename from input filename
803 my ($tailname, $dirname, $suffix)
804 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
805
806 # softlink to collection tmp dir
807 my $tmp_dirname = &util::get_timestamped_tmp_folder();
808 $tmp_dirname = $dirname unless defined $tmp_dirname;
809
810 # following two steps copied from ConvertBinaryFile
811 # do we need them?? can't use them as is, as they use plugin methods.
812
813 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
814
815 # URLEncode this since htmls with images where the html filename is utf8 don't seem
816 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
817 # files on the filesystem.
818 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
819 if (defined $output_ext) {
820 $output_ext = ".$output_ext"; # add the dot
821 } else {
822 $output_ext = $suffix;
823 }
824 $output_ext= lc($output_ext);
825 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
826
827 return $tmp_filename;
828}
829
830sub get_toplevel_tmp_dir
831{
832 return filename_cat($ENV{'GSDLHOME'}, "tmp");
833}
834
835
836sub filename_to_regex {
837 my $filename = shift (@_);
838
839 # need to put single backslash back to double so that regex works
840 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
841
842 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
843 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
844 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
845 $filename =~ s@\(@\\(@g; # escape brackets
846 $filename =~ s@\)@\\)@g; # escape brackets
847 $filename =~ s@\[@\\[@g; # escape brackets
848 $filename =~ s@\]@\\]@g; # escape brackets
849
850 return $filename;
851}
852
853sub unregex_filename {
854 my $filename = shift (@_);
855
856 # need to put doubled backslashes for regex back to single
857 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
858 $filename =~ s/\\\./\./g; # remove RE syntax for .
859 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
860 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
861 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
862 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
863 return $filename;
864}
865
866sub filename_cat {
867 my $first_file = shift(@_);
868 my (@filenames) = @_;
869
870# Useful for debugging
871# -- might make sense to call caller(0) rather than (1)??
872# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
873# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
874
875 # If first_file is not null or empty, then add it back into the list
876 if (defined $first_file && $first_file =~ /\S/) {
877 unshift(@filenames, $first_file);
878 }
879
880 my $filename = join("/", @filenames);
881
882 # remove duplicate slashes and remove the last slash
883 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
884 $filename =~ s/[\\\/]+/\\/g;
885 } else {
886 $filename =~ s/[\/]+/\//g;
887 # DB: want a filename abc\de.html to remain like this
888 }
889 $filename =~ s/[\\\/]$//;
890
891 return $filename;
892}
893
894
895sub pathname_cat {
896 my $first_path = shift(@_);
897 my (@pathnames) = @_;
898
899 # If first_path is not null or empty, then add it back into the list
900 if (defined $first_path && $first_path =~ /\S/) {
901 unshift(@pathnames, $first_path);
902 }
903
904 my $join_char;
905 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
906 $join_char = ";";
907 } else {
908 $join_char = ":";
909 }
910
911 my $pathname = join($join_char, @pathnames);
912
913 # remove duplicate slashes
914 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
915 $pathname =~ s/[\\\/]+/\\/g;
916 } else {
917 $pathname =~ s/[\/]+/\//g;
918 # DB: want a pathname abc\de.html to remain like this
919 }
920
921 return $pathname;
922}
923
924
925sub tidy_up_oid {
926 my ($OID) = @_;
927 if ($OID =~ /\./) {
928 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
929 $OID =~ s/\.//g; #remove any periods
930 }
931 if ($OID =~ /^\s.*\s$/) {
932 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
933 # remove starting and trailing whitespace
934 $OID =~ s/^\s+//;
935 $OID =~ s/\s+$//;
936 }
937 if ($OID =~ /^[\d]*$/) {
938 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
939 $OID = "D" . $OID;
940 }
941
942 return $OID;
943}
944sub envvar_prepend {
945 my ($var,$val) = @_;
946
947 # do not prepend any value/path that's already in the environment variable
948
949 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
950 if (!defined($ENV{$var})) {
951 $ENV{$var} = "$val";
952 }
953 elsif($ENV{$var} !~ m/$escaped_val/) {
954 $ENV{$var} = "$val;".$ENV{$var};
955 }
956}
957
958sub envvar_append {
959 my ($var,$val) = @_;
960
961 # do not append any value/path that's already in the environment variable
962
963 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
964 if (!defined($ENV{$var})) {
965 $ENV{$var} = "$val";
966 }
967 elsif($ENV{$var} !~ m/$escaped_val/) {
968 $ENV{$var} .= ";$val";
969 }
970}
971
972
973# splits a filename into a prefix and a tail extension using the tail_re, or
974# if that fails, splits on the file_extension . (dot)
975sub get_prefix_and_tail_by_regex {
976
977 my ($filename,$tail_re) = @_;
978
979 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
980 if ((!defined $file_prefix) || (!defined $file_ext)) {
981 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
982 }
983
984 return ($file_prefix,$file_ext);
985}
986
987# get full path and file only path from a base_dir (which may be empty) and
988# file (which may contain directories)
989sub get_full_filenames {
990 my ($base_dir, $file) = @_;
991
992 my $filename_full_path = $file;
993 # add on directory if present
994 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
995
996 my $filename_no_path = $file;
997
998 # remove directory if present
999 $filename_no_path =~ s/^.*[\/\\]//;
1000 return ($filename_full_path, $filename_no_path);
1001}
1002
1003# returns the path of a file without the filename -- ie. the directory the file is in
1004sub filename_head {
1005 my $filename = shift(@_);
1006
1007 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1008 $filename =~ s/[^\\\\]*$//;
1009 }
1010 else {
1011 $filename =~ s/[^\\\/]*$//;
1012 }
1013
1014 return $filename;
1015}
1016
1017
1018
1019# returns 1 if filename1 and filename2 point to the same
1020# file or directory
1021sub filenames_equal {
1022 my ($filename1, $filename2) = @_;
1023
1024 # use filename_cat to clean up trailing slashes and
1025 # multiple slashes
1026 $filename1 = filename_cat ($filename1);
1027 $filename2 = filename_cat ($filename2);
1028
1029 # filenames not case sensitive on windows
1030 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1031 $filename1 =~ tr/[A-Z]/[a-z]/;
1032 $filename2 =~ tr/[A-Z]/[a-z]/;
1033 }
1034 return 1 if $filename1 eq $filename2;
1035 return 0;
1036}
1037
1038# If filename is relative to within_dir, returns the relative path of filename to that directory
1039# with slashes in the filename returned as they were in the original (absolute) filename.
1040sub filename_within_directory
1041{
1042 my ($filename,$within_dir) = @_;
1043
1044 if ($within_dir !~ m/[\/\\]$/) {
1045 my $dirsep = &util::get_dirsep();
1046 $within_dir .= $dirsep;
1047 }
1048
1049 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
1050 if ($filename =~ m/^$within_dir(.*)$/) {
1051 $filename = $1;
1052 }
1053
1054 return $filename;
1055}
1056
1057# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1058# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1059# The subpath returned will also be a URL type filename.
1060sub filename_within_directory_url_format
1061{
1062 my ($filename,$within_dir) = @_;
1063
1064 # convert parameters only to / slashes if Windows
1065
1066 my $filename_urlformat = $filename;
1067 my $within_dir_urlformat = $within_dir;
1068
1069 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1070 # Only need to worry about Windows, as Unix style directories already in url-format
1071 # Convert Windows style \ => /
1072 $filename_urlformat =~ s@\\@/@g;
1073 $within_dir_urlformat =~ s@\\@/@g;
1074 }
1075
1076 #if ($within_dir_urlformat !~ m/\/$/) {
1077 # make sure directory ends with a slash
1078 #$within_dir_urlformat .= "/";
1079 #}
1080
1081 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1082
1083 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1084
1085 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1086 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1087 $filename_urlformat = $1;
1088 }
1089
1090 return $filename_urlformat;
1091}
1092
1093
1094sub filename_within_collection
1095{
1096 my ($filename) = @_;
1097
1098 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1099
1100 if (defined $collect_dir) {
1101
1102 # if from within GSDLCOLLECTDIR, then remove directory prefix
1103 # so source_filename is realative to it. This is done to aid
1104 # portability, i.e. the collection can be moved to somewhere
1105 # else on the file system and the archives directory will still
1106 # work. This is needed, for example in the applet version of
1107 # GLI where GSDLHOME/collect on the server will be different to
1108 # the collect directory of the remove user. Of course,
1109 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1110 # it back into a full pathname.
1111
1112 $filename = filename_within_directory($filename,$collect_dir);
1113 }
1114
1115 return $filename;
1116}
1117
1118sub prettyprint_file
1119{
1120 my ($base_dir,$file,$gli) = @_;
1121
1122 my $filename_full_path = &util::filename_cat($base_dir,$file);
1123
1124 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1125 require Win32;
1126
1127 # For some reason base_dir in the form c:/a/b/c
1128 # This leads to confusion later on, so turn it back into
1129 # the more usual Windows form
1130 $base_dir =~ s/\//\\/g;
1131 my $long_base_dir = Win32::GetLongPathName($base_dir);
1132 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1133
1134 $file = filename_within_directory($long_full_path,$long_base_dir);
1135 $file = encode("utf8",$file) if ($gli);
1136 }
1137
1138 return $file;
1139}
1140
1141
1142sub upgrade_if_dos_filename
1143{
1144 my ($filename_full_path,$and_encode) = @_;
1145
1146 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1147 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1148 # to its long (Windows) version
1149 my $long_filename = Win32::GetLongPathName($filename_full_path);
1150 if (defined $long_filename) {
1151 $filename_full_path = $long_filename;
1152 }
1153 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1154 $filename_full_path =~ s/^(.):/\u$1:/;
1155 if ((defined $and_encode) && ($and_encode)) {
1156 $filename_full_path = encode("utf8",$filename_full_path);
1157 }
1158 }
1159
1160 return $filename_full_path;
1161}
1162
1163
1164sub downgrade_if_dos_filename
1165{
1166 my ($filename_full_path) = @_;
1167
1168 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1169 require Win32;
1170
1171 # Ensure the given long Windows filename is in a form that can
1172 # be opened by Perl => convert it to a short DOS-like filename
1173
1174 my $short_filename = Win32::GetShortPathName($filename_full_path);
1175 if (defined $short_filename) {
1176 $filename_full_path = $short_filename;
1177 }
1178 # Make sure initial drive letter is lower-case (to fit in
1179 # with rest of Greenstone)
1180 $filename_full_path =~ s/^(.):/\u$1:/;
1181 }
1182
1183 return $filename_full_path;
1184}
1185
1186sub block_filename
1187{
1188 my ($block_hash,$filename) = @_;
1189
1190 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1191
1192 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1193 my $lower_filename = lc($filename);
1194 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1195# my $lower_drive = $filename;
1196# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1197
1198# my $upper_drive = $filename;
1199# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1200#
1201# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1202# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1203 }
1204 else {
1205 $block_hash->{'file_blocks'}->{$filename} = 1;
1206 }
1207}
1208
1209
1210sub filename_is_absolute
1211{
1212 my ($filename) = @_;
1213
1214 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1215 return ($filename =~ m/^(\w:)?\\/);
1216 }
1217 else {
1218 return ($filename =~ m/^\//);
1219 }
1220}
1221
1222
1223## @method make_absolute()
1224#
1225# Ensure the given file path is absolute in respect to the given base path.
1226#
1227# @param $base_dir A string denoting the base path the given dir must be
1228# absolute to.
1229# @param $dir The directory to be made absolute as a string. Note that the
1230# dir may already be absolute, in which case it will remain
1231# unchanged.
1232# @return The now absolute form of the directory as a string.
1233#
1234# @author John Thompson, DL Consulting Ltd.
1235# @copy 2006 DL Consulting Ltd.
1236#
1237#used in buildcol.pl, doesn't work for all cases --kjdon
1238sub make_absolute {
1239
1240 my ($base_dir, $dir) = @_;
1241### print STDERR "dir = $dir\n";
1242 $dir =~ s/[\\\/]+/\//g;
1243 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1244 $dir =~ s|^/tmp_mnt||;
1245 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1246 $dir =~ s|/[.][.]?/|/|g;
1247 $dir =~ tr|/|/|s;
1248### print STDERR "dir = $dir\n";
1249
1250 return $dir;
1251}
1252## make_absolute() ##
1253
1254sub get_dirsep {
1255
1256 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1257 return "\\";
1258 } else {
1259 return "\/";
1260 }
1261}
1262
1263sub get_os_dirsep {
1264
1265 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1266 return "\\\\";
1267 } else {
1268 return "\\\/";
1269 }
1270}
1271
1272sub get_re_dirsep {
1273
1274 return "\\\\|\\\/";
1275}
1276
1277
1278sub get_dirsep_tail {
1279 my ($filename) = @_;
1280
1281 # returns last part of directory or filename
1282 # On unix e.g. a/b.d => b.d
1283 # a/b/c => c
1284
1285 my $dirsep = get_re_dirsep();
1286 my @dirs = split (/$dirsep/, $filename);
1287 my $tail = pop @dirs;
1288
1289 # - caused problems under windows
1290 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1291
1292 return $tail;
1293}
1294
1295
1296# if this is running on windows we want binaries to end in
1297# .exe, otherwise they don't have to end in any extension
1298sub get_os_exe {
1299 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1300 return "";
1301}
1302
1303
1304# test to see whether this is a big or little endian machine
1305sub is_little_endian
1306{
1307 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1308 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1309 # Otherwise, it's little endian
1310
1311 #return 0 if $^O =~ /^darwin$/i;
1312 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1313
1314 # Going back to stating exactly whether the machine is little endian
1315 # or big endian, without any special case for Macs. Since for rata it comes
1316 # back with little endian and for shuttle with bigendian.
1317 return (ord(substr(pack("s",1), 0, 1)) == 1);
1318}
1319
1320
1321# will return the collection name if successful, "" otherwise
1322sub use_collection {
1323 my ($collection, $collectdir) = @_;
1324
1325 if (!defined $collectdir || $collectdir eq "") {
1326 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1327 }
1328
1329 # get and check the collection
1330 if (!defined($collection) || $collection eq "") {
1331 if (defined $ENV{'GSDLCOLLECTION'}) {
1332 $collection = $ENV{'GSDLCOLLECTION'};
1333 } else {
1334 print STDOUT "No collection specified\n";
1335 return "";
1336 }
1337 }
1338
1339 if ($collection eq "modelcol") {
1340 print STDOUT "You can't use modelcol.\n";
1341 return "";
1342 }
1343
1344 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1345 # are defined
1346 $ENV{'GSDLCOLLECTION'} = $collection;
1347 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1348
1349 # make sure this collection exists
1350 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1351 print STDOUT "Invalid collection ($collection).\n";
1352 return "";
1353 }
1354
1355 # everything is ready to go
1356 return $collection;
1357}
1358
1359sub get_current_collection_name {
1360 return $ENV{'GSDLCOLLECTION'};
1361}
1362
1363
1364# will return the collection name if successful, "" otherwise.
1365# Like use_collection (above) but for greenstone 3 (taking account of site level)
1366
1367sub use_site_collection {
1368 my ($site, $collection, $collectdir) = @_;
1369
1370 if (!defined $collectdir || $collectdir eq "") {
1371 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1372 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1373 }
1374
1375 # collectdir explicitly set by this point (using $site variable if required).
1376 # Can call "old" gsdl2 use_collection now.
1377
1378 return use_collection($collection,$collectdir);
1379}
1380
1381
1382
1383sub locate_config_file
1384{
1385 my ($file) = @_;
1386
1387 my $locations = locate_config_files($file);
1388
1389 return shift @$locations; # returns undef if 'locations' is empty
1390}
1391
1392
1393sub locate_config_files
1394{
1395 my ($file) = @_;
1396
1397 my @locations = ();
1398
1399 if (-e $file) {
1400 # Clearly specified (most likely full filename)
1401 # No need to hunt in 'etc' directories, return value unchanged
1402 push(@locations,$file);
1403 }
1404 else {
1405 # Check for collection specific one before looking in global GSDL 'etc'
1406 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1407 my $test_collect_etc_filename
1408 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1409
1410 if (-e $test_collect_etc_filename) {
1411 push(@locations,$test_collect_etc_filename);
1412 }
1413 }
1414 my $test_main_etc_filename
1415 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1416 if (-e $test_main_etc_filename) {
1417 push(@locations,$test_main_etc_filename);
1418 }
1419 }
1420
1421 return \@locations;
1422}
1423
1424
1425sub hyperlink_text
1426{
1427 my ($text) = @_;
1428
1429 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1430 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1431
1432 return $text;
1433}
1434
1435
1436# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1437# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1438sub is_dir_empty
1439{
1440 my ($path) = @_;
1441 opendir DIR, $path;
1442 while(my $entry = readdir DIR) {
1443 next if($entry =~ /^\.\.?$/);
1444 closedir DIR;
1445 return 0;
1446 }
1447 closedir DIR;
1448 return 1;
1449}
1450
1451# Returns the given filename converted using either URL encoding or base64
1452# encoding, as specified by $rename_method. If the given filename has no suffix
1453# (if it is just the tailname), then $no_suffix should be some defined value.
1454# rename_method can be url, none, base64
1455sub rename_file {
1456 my ($filename, $rename_method, $no_suffix) = @_;
1457
1458 if(!$filename) { # undefined or empty string
1459 return $filename;
1460 }
1461
1462 if (!$rename_method) {
1463 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1464 # Debugging information
1465 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1466 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1467 $rename_method = "url";
1468 } elsif($rename_method eq "none") {
1469 return $filename; # would have already been renamed
1470 }
1471
1472 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1473 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1474 ###$filename =~ s/ /_/g;
1475
1476 my ($tailname,$dirname,$suffix);
1477 if($no_suffix) { # given a tailname, no suffix
1478 ($tailname,$dirname) = File::Basename::fileparse($filename);
1479 }
1480 else {
1481 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1482 }
1483 if (!$suffix) {
1484 $suffix = "";
1485 }
1486 else {
1487 $suffix = lc($suffix);
1488 }
1489
1490 if ($rename_method eq "url") {
1491 $tailname = &unicode::url_encode($tailname);
1492 }
1493 elsif ($rename_method eq "base64") {
1494 $tailname = &unicode::base64_encode($tailname);
1495 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1496 }
1497
1498 $filename = "$tailname$suffix";
1499 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1500
1501 return $filename;
1502}
1503
1504
1505# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1506sub rename_ldb_or_bdb_file {
1507 my ($filename_no_ext) = @_;
1508
1509 my $new_filename = "$filename_no_ext.gdb";
1510 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1511 # try ldb
1512 my $old_filename = "$filename_no_ext.ldb";
1513
1514 if (-f $old_filename) {
1515 print STDERR "Renaming $old_filename to $new_filename\n";
1516 rename ($old_filename, $new_filename)
1517 || print STDERR "Rename failed: $!\n";
1518 return;
1519 }
1520 # try bdb
1521 $old_filename = "$filename_no_ext.bdb";
1522 if (-f $old_filename) {
1523 print STDERR "Renaming $old_filename to $new_filename\n";
1524 rename ($old_filename, $new_filename)
1525 || print STDERR "Rename failed: $!\n";
1526 return;
1527 }
1528}
1529
1530sub os_dir() {
1531
1532 my $gsdlarch = "";
1533 if(defined $ENV{'GSDLARCH'}) {
1534 $gsdlarch = $ENV{'GSDLARCH'};
1535 }
1536 return $ENV{'GSDLOS'}.$gsdlarch;
1537}
1538
1539# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1540# By default, /greenstone3 for GS3 or /greenstone for GS2.
1541sub get_greenstone_url_prefix() {
1542 # if already set on a previous occasion, just return that
1543 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1544 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1545
1546 my ($configfile, $urlprefix, $defaultUrlprefix);
1547 my @propertynames = ();
1548
1549 if($ENV{'GSDL3SRCHOME'}) {
1550 $defaultUrlprefix = "/greenstone3";
1551 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1552 push(@propertynames, qw/path\s*\=/);
1553 } else {
1554 $defaultUrlprefix = "/greenstone";
1555 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1556 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1557 }
1558
1559 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1560
1561 if(!$urlprefix) { # no values found for URL prefix, use default values
1562 $urlprefix = $defaultUrlprefix;
1563 } else {
1564 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1565 $urlprefix =~ s/^\///; # remove the starting slash
1566 my @dirs = split(/(\\|\/)/, $urlprefix);
1567 $urlprefix = shift(@dirs);
1568
1569 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1570 $urlprefix = "/$urlprefix";
1571 }
1572 }
1573
1574 # set for the future
1575 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1576# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1577 return $urlprefix;
1578}
1579
1580
1581# Given a config file (xml or java properties file) and a list/array of regular expressions
1582# that represent property names to match on, this function will return the value for the 1st
1583# matching property name. If the return value is undefined, no matching property was found.
1584sub extract_propvalue_from_file() {
1585 my ($configfile, $propertynames) = @_;
1586
1587 my $value;
1588 unless(open(FIN, "<$configfile")) {
1589 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1590 return $value; # not initialised
1591 }
1592
1593 # Read the entire file at once, as one single line, then close it
1594 my $filecontents;
1595 {
1596 local $/ = undef;
1597 $filecontents = <FIN>;
1598 }
1599 close(FIN);
1600
1601 foreach my $regex (@$propertynames) {
1602 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1603 if($value) {
1604 $value =~ s/^\"//; # remove any startquotes
1605 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1606 last; # found value for a matching property, break from loop
1607 }
1608 }
1609
1610 return $value;
1611}
1612
1613# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1614# given that perllib is in @INC in order to invoke this subroutine.
1615# Call as follows -- after setting up INC to include perllib and
1616# after setting up GSDLHOME and GSDLOS:
1617#
1618# require util;
1619# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1620#
1621sub setup_greenstone_env() {
1622 my ($GSDLHOME, $GSDLOS) = @_;
1623
1624 #my %env_map = ();
1625 # Get the localised ENV settings of running a localised source setup.bash
1626 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1627 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1628 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1629 if($GSDLOS =~ m/windows/i) {
1630 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1631 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1632 }
1633 if (!open(PIN, "$perl_command |")) {
1634 print STDERR ("Unable to execute command: $perl_command. $!\n");
1635 }
1636
1637 while (defined (my $perl_output_line = <PIN>)) {
1638 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1639 #$env_map{$key}=$value;
1640 $ENV{$key}=$value;
1641 }
1642 close (PIN);
1643
1644 # If any keys in $ENV don't occur in Greenstone's localised env
1645 # (stored in $env_map), delete those entries from $ENV
1646 #foreach $key (keys %ENV) {
1647 # if(!defined $env_map{$key}) {
1648 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1649 # delete $ENV{$key}; # del $ENV(key, value) pair
1650 # }
1651 #}
1652 #undef %env_map;
1653}
1654
1655sub get_perl_exec() {
1656 my $perl_exec = $^X; # may return just "perl"
1657
1658 if($ENV{'PERLPATH'}) {
1659 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1660 if($ENV{'GSDLOS'} =~ m/windows/) {
1661 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1662 } else {
1663 $perl_exec = "$ENV{'PERLPATH'}/perl";
1664 }
1665 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1666 # containing the full path to the current perl executable we're using
1667 $perl_exec = $Config{perlpath}; # configured path for perl
1668 if (!-e $perl_exec) { # may not point to location on this machine
1669 $perl_exec = $^X; # may return just "perl"
1670 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1671 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1672 }
1673 }
1674 }
1675
1676 return $perl_exec;
1677}
1678
1679
16801;
Note: See TracBrowser for help on using the repository browser.