source: main/trunk/greenstone2/perllib/util.pm@ 24829

Last change on this file since 24829 was 24829, checked in by ak19, 12 years ago

Changes to bat files and perl code to deal with brackets in (Windows) filepath. Also checked winmake.bat files to see if changes were needed there. These changes go together with the commits 24826 to 24828 for gems.bat, and commit 24820 on makegs2.bat.

  • Property svn:keywords set to Author Date Id Revision
File size: 45.0 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
29
30use Encode;
[4]31use File::Copy;
[619]32use File::Basename;
[24362]33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
[4]38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
[18469]42
[4]43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
[10046]47 foreach my $file (@files) {
[4]48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
[721]50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
[4]52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
[23249]66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
[4]71
[23249]72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
[10211]84
[23249]85
[4]86# recursive removal
[10211]87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
[4]89
[24291]90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
[10211]94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
[4]96 # recursively remove the files
[10211]97 foreach my $file (@files_array) {
[4]98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
[10211]101 print STDERR "util::filtered_rm_r $file does not exist\n";
[4]102
[721]103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
[4]104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
[10211]106 print STDERR "util::filtered_rm_r could not open directory $file\n";
[4]107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
[10211]110
[4]111 # remove all the files in this directory
[10211]112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
[4]114
[10211]115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
[4]120 }
121 }
[10211]122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
[4]124
[10211]125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
[4]129 }
130 }
131}
132
[10211]133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
[721]147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
[4]151
[721]152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
[8716]167 foreach my $file (@srcfiles) {
[721]168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 rename ($file, $tempdest);
177 }
178 }
179}
180
181
[4]182# copies a file or a group of files
183sub cp {
184 my $dest = pop (@_);
185 my (@srcfiles) = @_;
186
187 # remove trailing slashes from source and destination files
188 $dest =~ s/[\\\/]+$//;
189 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
190
191 # a few sanity checks
192 if (scalar (@srcfiles) == 0) {
193 print STDERR "util::cp no destination directory given\n";
194 return;
195 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
196 print STDERR "util::cp if multiple source files are given the ".
197 "destination must be a directory\n";
198 return;
199 }
200
201 # copy the files
[8716]202 foreach my $file (@srcfiles) {
[4]203 my $tempdest = $dest;
204 if (-d $tempdest) {
205 my ($filename) = $file =~ /([^\\\/]+)$/;
206 $tempdest .= "/$filename";
207 }
208 if (!-e $file) {
209 print STDERR "util::cp $file does not exist\n";
210 } elsif (!-f $file) {
211 print STDERR "util::cp $file is not a plain file\n";
212 } else {
213 &File::Copy::copy ($file, $tempdest);
214 }
215 }
216}
217
218
[721]219
[4]220# recursively copies a file or group of files
[1454]221# syntax: cp_r (sourcefiles, destination directory)
222# destination must be a directory - to copy one file to
223# another use cp instead
[4]224sub cp_r {
225 my $dest = pop (@_);
226 my (@srcfiles) = @_;
227
228 # a few sanity checks
229 if (scalar (@srcfiles) == 0) {
[1454]230 print STDERR "util::cp_r no destination directory given\n";
[4]231 return;
[1454]232 } elsif (-f $dest) {
233 print STDERR "util::cp_r destination must be a directory\n";
[4]234 return;
235 }
236
[1454]237 # create destination directory if it doesn't exist already
238 if (! -d $dest) {
239 my $store_umask = umask(0002);
240 mkdir ($dest, 0777);
241 umask($store_umask);
242 }
243
[4]244 # copy the files
[8716]245 foreach my $file (@srcfiles) {
[4]246
247 if (!-e $file) {
[1454]248 print STDERR "util::cp_r $file does not exist\n";
[4]249
250 } elsif (-d $file) {
[1586]251 # make the new directory
252 my ($filename) = $file =~ /([^\\\/]*)$/;
253 $dest = &util::filename_cat ($dest, $filename);
254 my $store_umask = umask(0002);
255 mkdir ($dest, 0777);
256 umask($store_umask);
[836]257
[4]258 # get the contents of this directory
259 if (!opendir (INDIR, $file)) {
260 print STDERR "util::cp_r could not open directory $file\n";
261 } else {
[1454]262 my @filedir = readdir (INDIR);
[4]263 closedir (INDIR);
[8716]264 foreach my $f (@filedir) {
[1454]265 next if $f =~ /^\.\.?$/;
266 # copy all the files in this directory
267 my $ff = &util::filename_cat ($file, $f);
268 &cp_r ($ff, $dest);
269 }
[4]270 }
271
272 } else {
[1454]273 &cp($file, $dest);
[4]274 }
275 }
276}
[21762]277# recursively copies a file or group of files
278# syntax: cp_r (sourcefiles, destination directory)
279# destination must be a directory - to copy one file to
280# another use cp instead
281sub cp_r_nosvn {
282 my $dest = pop (@_);
283 my (@srcfiles) = @_;
[4]284
[21762]285 # a few sanity checks
286 if (scalar (@srcfiles) == 0) {
287 print STDERR "util::cp_r no destination directory given\n";
288 return;
289 } elsif (-f $dest) {
290 print STDERR "util::cp_r destination must be a directory\n";
291 return;
292 }
293
294 # create destination directory if it doesn't exist already
295 if (! -d $dest) {
296 my $store_umask = umask(0002);
297 mkdir ($dest, 0777);
298 umask($store_umask);
299 }
300
301 # copy the files
302 foreach my $file (@srcfiles) {
303
304 if (!-e $file) {
305 print STDERR "util::cp_r $file does not exist\n";
306
307 } elsif (-d $file) {
308 # make the new directory
309 my ($filename) = $file =~ /([^\\\/]*)$/;
310 $dest = &util::filename_cat ($dest, $filename);
311 my $store_umask = umask(0002);
312 mkdir ($dest, 0777);
313 umask($store_umask);
314
315 # get the contents of this directory
316 if (!opendir (INDIR, $file)) {
317 print STDERR "util::cp_r could not open directory $file\n";
318 } else {
319 my @filedir = readdir (INDIR);
320 closedir (INDIR);
321 foreach my $f (@filedir) {
322 next if $f =~ /^\.\.?$/;
323 next if $f =~ /^\.svn$/;
324 # copy all the files in this directory
325 my $ff = &util::filename_cat ($file, $f);
326 &cp_r ($ff, $dest);
327 }
328 }
329
330 } else {
331 &cp($file, $dest);
332 }
333 }
334}
335
[11179]336# copies a directory and its contents, excluding subdirectories, into a new directory
337sub cp_r_toplevel {
338 my $dest = pop (@_);
339 my (@srcfiles) = @_;
[4]340
[11179]341 # a few sanity checks
342 if (scalar (@srcfiles) == 0) {
343 print STDERR "util::cp_r no destination directory given\n";
344 return;
345 } elsif (-f $dest) {
346 print STDERR "util::cp_r destination must be a directory\n";
347 return;
348 }
349
350 # create destination directory if it doesn't exist already
351 if (! -d $dest) {
352 my $store_umask = umask(0002);
353 mkdir ($dest, 0777);
354 umask($store_umask);
355 }
356
357 # copy the files
358 foreach my $file (@srcfiles) {
359
360 if (!-e $file) {
361 print STDERR "util::cp_r $file does not exist\n";
362
363 } elsif (-d $file) {
364 # make the new directory
365 my ($filename) = $file =~ /([^\\\/]*)$/;
366 $dest = &util::filename_cat ($dest, $filename);
367 my $store_umask = umask(0002);
368 mkdir ($dest, 0777);
369 umask($store_umask);
370
371 # get the contents of this directory
372 if (!opendir (INDIR, $file)) {
373 print STDERR "util::cp_r could not open directory $file\n";
374 } else {
375 my @filedir = readdir (INDIR);
376 closedir (INDIR);
377 foreach my $f (@filedir) {
378 next if $f =~ /^\.\.?$/;
379
380 # copy all the files in this directory, but not directories
381 my $ff = &util::filename_cat ($file, $f);
382 if (-f $ff) {
383 &cp($ff, $dest);
384 #&cp_r ($ff, $dest);
385 }
386 }
387 }
388
389 } else {
390 &cp($file, $dest);
391 }
392 }
393}
394
[721]395sub mk_dir {
396 my ($dir) = @_;
397
[836]398 my $store_umask = umask(0002);
399 my $mkdir_ok = mkdir ($dir, 0777);
400 umask($store_umask);
401
402 if (!$mkdir_ok)
403 {
[721]404 print STDERR "util::mk_dir could not create directory $dir\n";
405 return;
406 }
407}
408
[1046]409# in case anyone cares - I did some testing (using perls Benchmark module)
410# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
411# slightly faster (surprisingly) - Stefan.
[4]412sub mk_all_dir {
413 my ($dir) = @_;
414
415 # use / for the directory separator, remove duplicate and
416 # trailing slashes
417 $dir=~s/[\\\/]+/\//g;
418 $dir=~s/[\\\/]+$//;
419
420 # make sure the cache directory exists
421 my $dirsofar = "";
422 my $first = 1;
[8716]423 foreach my $dirname (split ("/", $dir)) {
[4]424 $dirsofar .= "/" unless $first;
425 $first = 0;
426
427 $dirsofar .= $dirname;
428
429 next if $dirname =~ /^(|[a-z]:)$/i;
[836]430 if (!-e $dirsofar)
431 {
432 my $store_umask = umask(0002);
433 my $mkdir_ok = mkdir ($dirsofar, 0777);
434 umask($store_umask);
435 if (!$mkdir_ok)
436 {
437 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
438 return;
439 }
440 }
[4]441 }
442}
443
[619]444# make hard link to file if supported by OS, otherwise copy the file
445sub hard_link {
[18463]446 my ($src, $dest, $verbosity) = @_;
[4]447
[619]448 # remove trailing slashes from source and destination files
449 $src =~ s/[\\\/]+$//;
450 $dest =~ s/[\\\/]+$//;
451
[23307]452## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
[619]453 # a few sanity checks
[812]454 if (-e $dest) {
455 # destination file already exists
456 return;
457 }
458 elsif (!-e $src) {
[23307]459 print STDERR "util::hard_link source file \"$src\" does not exist\n";
[3628]460 return 1;
[619]461 }
462 elsif (-d $src) {
[23307]463 print STDERR "util::hard_link source \"$src\" is a directory\n";
[3628]464 return 1;
[619]465 }
466
467 my $dest_dir = &File::Basename::dirname($dest);
468 mk_all_dir($dest_dir) if (!-e $dest_dir);
469
[14365]470
[22119]471 if (!link($src, $dest)) {
[18463]472 if ((!defined $verbosity) || ($verbosity>2)) {
473 print STDERR "util::hard_link: unable to create hard link. ";
474 print STDERR " Copying file: $src -> $dest\n";
475 }
[14365]476 &File::Copy::copy ($src, $dest);
[619]477 }
[3628]478 return 0;
[619]479}
480
[2193]481# make soft link to file if supported by OS, otherwise copy file
[721]482sub soft_link {
[15165]483 my ($src, $dest, $ensure_paths_absolute) = @_;
[619]484
[721]485 # remove trailing slashes from source and destination files
486 $src =~ s/[\\\/]+$//;
487 $dest =~ s/[\\\/]+$//;
[619]488
[15165]489 # Ensure file paths are absolute IF requested to do so
490 # Soft_linking didn't work for relative paths
491 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
492 # We need to ensure that the src file is the absolute path
493 # See http://perldoc.perl.org/File/Spec.html
494 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
495 $src = File::Spec->rel2abs($src); # make absolute
496 }
497 # Might as well ensure that the destination file's absolute path is used
498 if(!File::Spec->file_name_is_absolute( $dest )) {
499 $dest = File::Spec->rel2abs($dest); # make absolute
500 }
501 }
502
[721]503 # a few sanity checks
504 if (!-e $src) {
505 print STDERR "util::soft_link source file $src does not exist\n";
506 return 0;
507 }
[619]508
[721]509 my $dest_dir = &File::Basename::dirname($dest);
510 mk_all_dir($dest_dir) if (!-e $dest_dir);
[14365]511
[2193]512 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[23484]513
[14365]514 # symlink not supported on windows
515 &File::Copy::copy ($src, $dest);
[2193]516
517 } elsif (!eval {symlink($src, $dest)}) {
[2974]518 print STDERR "util::soft_link: unable to create soft link.\n";
[721]519 return 0;
520 }
521
522 return 1;
523}
524
[23362]525# Primarily for filenames generated by processing
526# content of HTML files (which are mapped to UTF-8 internally)
527#
528# To turn this into an octet string that really exists on the file
529# system:
530# 1. don't need to do anything special for Unix-based systems
531# (as underlying file system is byte-code)
532# 2. need to map to short DOS filenames for Windows
[721]533
[23362]534sub utf8_to_real_filename
535{
536 my ($utf8_filename) = @_;
[721]537
[23362]538 my $real_filename;
[721]539
[23362]540 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
541 require Win32;
[23388]542
543 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
544
[23362]545 my $unicode_filename = decode("utf8",$utf8_filename);
546 $real_filename = Win32::GetShortPathName($unicode_filename);
547 }
548 else {
549 $real_filename = $utf8_filename;
550 }
551
552 return $real_filename;
553}
554
555
556sub fd_exists
557{
558 my $filename_full_path = shift @_;
559 my $test_op = shift @_ || "-e";
560
561 # By default tests for existance of file or directory (-e)
562 # Can be made more specific by providing second parameter (e.g. -f or -d)
563
564 my $exists = 0;
565
566 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
567 require Win32;
568 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
569 if (!defined $filename_short_path) {
570 # Was probably still in UTF8 form (not what is needed on Windows)
571 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
572 if (defined $unicode_filename_full_path) {
573 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
574 }
575 }
576 $filename_full_path = $filename_short_path;
577 }
578
579 if (defined $filename_full_path) {
580 $exists = eval "($test_op \$filename_full_path)";
581 }
582
583 return $exists;
584}
585
586sub file_exists
587{
588 my ($filename_full_path) = @_;
589
590 return fd_exists($filename_full_path,"-f");
591}
592
593sub dir_exists
594{
595 my ($filename_full_path) = @_;
596
597 return fd_exists($filename_full_path,"-d");
598}
599
600
601
[4]602# updates a copy of a directory in some other part of the filesystem
603# verbosity settings are: 0=low, 1=normal, 2=high
604# both $fromdir and $todir should be absolute paths
605sub cachedir {
606 my ($fromdir, $todir, $verbosity) = @_;
607 $verbosity = 1 unless defined $verbosity;
608
609 # use / for the directory separator, remove duplicate and
610 # trailing slashes
611 $fromdir=~s/[\\\/]+/\//g;
612 $fromdir=~s/[\\\/]+$//;
613 $todir=~s/[\\\/]+/\//g;
614 $todir=~s/[\\\/]+$//;
615
616 &mk_all_dir ($todir);
617
618 # get the directories in ascending order
619 if (!opendir (FROMDIR, $fromdir)) {
620 print STDERR "util::cachedir could not read directory $fromdir\n";
621 return;
622 }
623 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
624 closedir (FROMDIR);
625
626 if (!opendir (TODIR, $todir)) {
627 print STDERR "util::cacedir could not read directory $todir\n";
628 return;
629 }
630 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
631 closedir (TODIR);
632
633 my $fromi = 0;
634 my $toi = 0;
635
636 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
637# print "fromi: $fromi toi: $toi\n";
638
639 # see if we should delete a file/directory
640 # this should happen if the file/directory
641 # is not in the from list or if its a different
642 # size, or has an older timestamp
643 if ($toi < scalar(@todir)) {
644 if (($fromi >= scalar(@fromdir)) ||
645 ($todir[$toi] lt $fromdir[$fromi] ||
646 ($todir[$toi] eq $fromdir[$fromi] &&
647 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
648 $verbosity)))) {
649
650 # the files are different
651 &rm_r("$todir/$todir[$toi]");
652 splice(@todir, $toi, 1); # $toi stays the same
653
654 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
655 # the files are the same
656 # if it is a directory, check its contents
657 if (-d "$todir/$todir[$toi]") {
658 &cachedir ("$fromdir/$fromdir[$fromi]",
659 "$todir/$todir[$toi]", $verbosity);
660 }
661
662 $toi++;
663 $fromi++;
664 next;
665 }
666 }
667
668 # see if we should insert a file/directory
669 # we should insert a file/directory if there
670 # is no tofiles left or if the tofile does not exist
671 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
672 $todir[$toi] gt $fromdir[$fromi])) {
673 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
674 splice (@todir, $toi, 0, $fromdir[$fromi]);
675
676 $toi++;
677 $fromi++;
678 }
679 }
680}
681
682# this function returns -1 if either file is not found
683# assumes that $file1 and $file2 are absolute file names or
684# in the current directory
685# $file2 is allowed to be newer than $file1
686sub differentfiles {
687 my ($file1, $file2, $verbosity) = @_;
688 $verbosity = 1 unless defined $verbosity;
689
690 $file1 =~ s/\/+$//;
691 $file2 =~ s/\/+$//;
692
693 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
694 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
695
696 return -1 unless (-e $file1 && -e $file2);
697 if ($file1name ne $file2name) {
698 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
699 return 1;
700 }
701
[8716]702 my @file1stat = stat ($file1);
703 my @file2stat = stat ($file2);
[4]704
705 if (-d $file1) {
706 if (! -d $file2) {
707 print STDERR "one file is a directory\n" if ($verbosity >= 2);
708 return 1;
709 }
710 return 0;
711 }
712
713 # both must be regular files
714 unless (-f $file1 && -f $file2) {
715 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
716 return 1;
717 }
718
719 # the size of the files must be the same
720 if ($file1stat[7] != $file2stat[7]) {
721 print STDERR "different sized files\n" if ($verbosity >= 2);
722 return 1;
723 }
724
725 # the second file cannot be older than the first
726 if ($file1stat[9] > $file2stat[9]) {
727 print STDERR "file is older\n" if ($verbosity >= 2);
728 return 1;
729 }
730
731 return 0;
732}
733
734
[16266]735sub get_tmp_filename
736{
737 my $file_ext = shift(@_) || undef;
738
[22438]739 my $opt_dot_file_ext = "";
740 if (defined $file_ext) {
741 if ($file_ext !~ m/\./) {
742 # no dot, so needs one added in at start
743 $opt_dot_file_ext = ".$file_ext"
744 }
745 else {
746 # allow for "extensions" such as _metadata.txt to be handled
747 # gracefully
748 $opt_dot_file_ext = $file_ext;
749 }
750 }
[16266]751
[2795]752 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
[4]753 &mk_all_dir ($tmpdir) unless -e $tmpdir;
754
755 my $count = 1000;
756 my $rand = int(rand $count);
[16266]757 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
758
759 while (-e $full_tmp_filename) {
[4]760 $rand = int(rand $count);
[16266]761 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
[4]762 $count++;
763 }
[16266]764
765 return $full_tmp_filename;
[4]766}
767
[22886]768sub get_timestamped_tmp_folder
[22873]769{
770
[22886]771 my $tmp_dirname;
[22873]772 if(defined $ENV{'GSDLCOLLECTDIR'}) {
773 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
774 } elsif(defined $ENV{'GSDLHOME'}) {
775 $tmp_dirname = $ENV{'GSDLHOME'};
[22886]776 } else {
777 return undef;
[22873]778 }
779
780 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
781 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
782
783 # add the timestamp into the path otherwise we can run into problems
784 # if documents have the same name
785 my $timestamp = time;
786 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
787 $tmp_dirname = $time_tmp_dirname;
788 my $i = 1;
789 while (-e $tmp_dirname) {
790 $tmp_dirname = "$time_tmp_dirname$i";
791 $i++;
792 }
793 &util::mk_dir($tmp_dirname);
794
[22886]795 return $tmp_dirname;
796}
[22873]797
[22886]798sub get_timestamped_tmp_filename_in_collection
799{
800
801 my ($input_filename, $output_ext) = @_;
802 # derive tmp filename from input filename
803 my ($tailname, $dirname, $suffix)
804 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
805
806 # softlink to collection tmp dir
807 my $tmp_dirname = &util::get_timestamped_tmp_folder();
808 $tmp_dirname = $dirname unless defined $tmp_dirname;
809
[22873]810 # following two steps copied from ConvertBinaryFile
[22886]811 # do we need them?? can't use them as is, as they use plugin methods.
812
[22873]813 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
814
815 # URLEncode this since htmls with images where the html filename is utf8 don't seem
816 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
817 # files on the filesystem.
818 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
819 if (defined $output_ext) {
820 $output_ext = ".$output_ext"; # add the dot
821 } else {
822 $output_ext = $suffix;
823 }
824 $output_ext= lc($output_ext);
825 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
826
827 return $tmp_filename;
828}
829
[21218]830sub get_toplevel_tmp_dir
831{
832 return filename_cat($ENV{'GSDLHOME'}, "tmp");
833}
834
835
[17512]836sub filename_to_regex {
837 my $filename = shift (@_);
[4]838
[17512]839 # need to put single backslash back to double so that regex works
840 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[24829]841 $filename =~ s/\\/\\\\/g;
[17512]842 }
[24829]843
844 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
845 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
846 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
847 $filename =~ s@\(@\\(@g; # escape brackets
848 $filename =~ s@\)@\\)@g; # escape brackets
849
[17512]850 return $filename;
851}
852
[24829]853sub unregex_filename {
854 my $filename = shift (@_);
855
856 # need to put doubled backslashes for regex back to single
857 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
858 $filename =~ s/\\\./\./g; # remove RE syntax for .
859 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
860 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
861 return $filename;
862}
863
[4]864sub filename_cat {
[7507]865 my $first_file = shift(@_);
[4]866 my (@filenames) = @_;
[10146]867
[16266]868# Useful for debugging
869# -- might make sense to call caller(0) rather than (1)??
870# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
[22856]871# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
[18913]872
873 # If first_file is not null or empty, then add it back into the list
874 if (defined $first_file && $first_file =~ /\S/) {
[7507]875 unshift(@filenames, $first_file);
876 }
877
[4]878 my $filename = join("/", @filenames);
879
880 # remove duplicate slashes and remove the last slash
[488]881 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
882 $filename =~ s/[\\\/]+/\\/g;
883 } else {
[836]884 $filename =~ s/[\/]+/\//g;
885 # DB: want a filename abc\de.html to remain like this
[488]886 }
887 $filename =~ s/[\\\/]$//;
[4]888
889 return $filename;
890}
891
[21413]892
893sub pathname_cat {
894 my $first_path = shift(@_);
895 my (@pathnames) = @_;
896
897 # If first_path is not null or empty, then add it back into the list
898 if (defined $first_path && $first_path =~ /\S/) {
899 unshift(@pathnames, $first_path);
900 }
901
[21425]902 my $join_char;
[21413]903 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
904 $join_char = ";";
905 } else {
906 $join_char = ":";
907 }
908
909 my $pathname = join($join_char, @pathnames);
910
911 # remove duplicate slashes
912 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
913 $pathname =~ s/[\\\/]+/\\/g;
914 } else {
915 $pathname =~ s/[\/]+/\//g;
916 # DB: want a pathname abc\de.html to remain like this
917 }
918
919 return $pathname;
920}
921
922
[19616]923sub tidy_up_oid {
924 my ($OID) = @_;
925 if ($OID =~ /\./) {
926 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
927 $OID =~ s/\.//g; #remove any periods
928 }
929 if ($OID =~ /^\s.*\s$/) {
930 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
931 # remove starting and trailing whitespace
932 $OID =~ s/^\s+//;
933 $OID =~ s/\s+$//;
934 }
935 if ($OID =~ /^[\d]*$/) {
936 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
937 $OID = "D" . $OID;
938 }
939
940 return $OID;
941}
[10212]942sub envvar_prepend {
943 my ($var,$val) = @_;
944
[16404]945 # do not prepend any value/path that's already in the environment variable
[16442]946 if ($ENV{'GSDLOS'} =~ /^windows$/i)
947 {
[24829]948 my $escaped_val = &filename_to_regex($val); # escape any Windows backslashes and brackets for upcoming regex
[22386]949 if (!defined($ENV{$var})) {
950 $ENV{$var} = "$val";
951 }
952 elsif($ENV{$var} !~ m/$escaped_val/) {
[16442]953 $ENV{$var} = "$val;".$ENV{$var};
[16404]954 }
[16442]955 }
956 else {
[22386]957 if (!defined($ENV{$var})) {
958 $ENV{$var} = "$val";
959 }
960 elsif($ENV{$var} !~ m/$val/) {
[16442]961 $ENV{$var} = "$val:".$ENV{$var};
[16404]962 }
[10212]963 }
964}
965
966sub envvar_append {
967 my ($var,$val) = @_;
968
[16404]969 # do not append any value/path that's already in the environment variable
[16442]970 if ($ENV{'GSDLOS'} =~ /^windows$/i)
971 {
[24829]972 my $escaped_val = &filename_to_regex($val); # escape any Windows backslashes and brackets for upcoming regex
[22386]973 if (!defined($ENV{$var})) {
974 $ENV{$var} = "$val";
975 }
976 elsif($ENV{$var} !~ m/$escaped_val/) {
[16404]977 $ENV{$var} .= ";$val";
978 }
[16442]979 }
980 else {
[22386]981 if (!defined($ENV{$var})) {
982 $ENV{$var} = "$val";
983 }
984 elsif($ENV{$var} !~ m/$val/) {
[16404]985 $ENV{$var} .= ":$val";
986 }
[16442]987 }
[10212]988}
989
[16442]990
[16380]991# splits a filename into a prefix and a tail extension using the tail_re, or
992# if that fails, splits on the file_extension . (dot)
993sub get_prefix_and_tail_by_regex {
[10212]994
[16380]995 my ($filename,$tail_re) = @_;
996
997 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
998 if ((!defined $file_prefix) || (!defined $file_ext)) {
999 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1000 }
1001
1002 return ($file_prefix,$file_ext);
1003}
1004
1005# get full path and file only path from a base_dir (which may be empty) and
1006# file (which may contain directories)
1007sub get_full_filenames {
1008 my ($base_dir, $file) = @_;
1009
1010 my $filename_full_path = $file;
1011 # add on directory if present
1012 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1013
1014 my $filename_no_path = $file;
1015
1016 # remove directory if present
1017 $filename_no_path =~ s/^.*[\/\\]//;
1018 return ($filename_full_path, $filename_no_path);
1019}
1020
[8682]1021# returns the path of a file without the filename -- ie. the directory the file is in
1022sub filename_head {
1023 my $filename = shift(@_);
1024
1025 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1026 $filename =~ s/[^\\\\]*$//;
1027 }
1028 else {
1029 $filename =~ s/[^\\\/]*$//;
1030 }
1031
1032 return $filename;
1033}
1034
1035
[23362]1036
[1454]1037# returns 1 if filename1 and filename2 point to the same
1038# file or directory
1039sub filenames_equal {
1040 my ($filename1, $filename2) = @_;
1041
1042 # use filename_cat to clean up trailing slashes and
1043 # multiple slashes
1044 $filename1 = filename_cat ($filename1);
[2516]1045 $filename2 = filename_cat ($filename2);
[1454]1046
1047 # filenames not case sensitive on windows
1048 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1049 $filename1 =~ tr/[A-Z]/[a-z]/;
1050 $filename2 =~ tr/[A-Z]/[a-z]/;
1051 }
1052 return 1 if $filename1 eq $filename2;
1053 return 0;
1054}
1055
[23362]1056
1057sub filename_within_directory
1058{
1059 my ($filename,$within_dir) = @_;
1060
[23371]1061 if ($within_dir !~ m/[\/\\]$/) {
1062 my $dirsep = &util::get_dirsep();
[23362]1063 $within_dir .= $dirsep;
1064 }
1065
[24829]1066 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]1067 if ($filename =~ m/^$within_dir(.*)$/) {
1068 $filename = $1;
1069 }
1070
1071 return $filename;
1072}
1073
[10281]1074sub filename_within_collection
1075{
1076 my ($filename) = @_;
1077
1078 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1079
1080 if (defined $collect_dir) {
[23362]1081
[15875]1082 # if from within GSDLCOLLECTDIR, then remove directory prefix
1083 # so source_filename is realative to it. This is done to aid
1084 # portability, i.e. the collection can be moved to somewhere
1085 # else on the file system and the archives directory will still
1086 # work. This is needed, for example in the applet version of
1087 # GLI where GSDLHOME/collect on the server will be different to
1088 # the collect directory of the remove user. Of course,
1089 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1090 # it back into a full pathname.
[23362]1091
1092 $filename = filename_within_directory($filename,$collect_dir);
[10281]1093 }
1094
1095 return $filename;
1096}
1097
[23362]1098sub prettyprint_file
1099{
[23484]1100 my ($base_dir,$file,$gli) = @_;
[23362]1101
1102 my $filename_full_path = &util::filename_cat($base_dir,$file);
1103
1104 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1105 require Win32;
1106
1107 # For some reason base_dir in the form c:/a/b/c
1108 # This leads to confusion later on, so turn it back into
1109 # the more usual Windows form
1110 $base_dir =~ s/\//\\/g;
1111 my $long_base_dir = Win32::GetLongPathName($base_dir);
1112 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1113
1114 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]1115 $file = encode("utf8",$file) if ($gli);
[23362]1116 }
1117
1118 return $file;
1119}
1120
1121
1122sub upgrade_if_dos_filename
1123{
[23371]1124 my ($filename_full_path,$and_encode) = @_;
[23362]1125
1126 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1127 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1128 # to its long (Windows) version
[23416]1129 my $long_filename = Win32::GetLongPathName($filename_full_path);
1130 if (defined $long_filename) {
1131 $filename_full_path = $long_filename;
1132 }
[23362]1133 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]1134 $filename_full_path =~ s/^(.):/\u$1:/;
[23371]1135 if ((defined $and_encode) && ($and_encode)) {
1136 $filename_full_path = encode("utf8",$filename_full_path);
1137 }
[23362]1138 }
1139
1140 return $filename_full_path;
1141}
1142
1143
[23388]1144sub downgrade_if_dos_filename
1145{
1146 my ($filename_full_path) = @_;
1147
1148 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1149 require Win32;
1150
1151 # Ensure the given long Windows filename is in a form that can
1152 # be opened by Perl => convert it to a short DOS-like filename
1153
[23414]1154 my $short_filename = Win32::GetShortPathName($filename_full_path);
1155 if (defined $short_filename) {
1156 $filename_full_path = $short_filename;
1157 }
[23416]1158 # Make sure initial drive letter is lower-case (to fit in
1159 # with rest of Greenstone)
[23483]1160 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]1161 }
1162
1163 return $filename_full_path;
1164}
1165
[23561]1166sub block_filename
1167{
1168 my ($block_hash,$filename) = @_;
1169
1170 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1171
1172 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1173 my $lower_filename = lc($filename);
1174 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1175# my $lower_drive = $filename;
1176# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1177
1178# my $upper_drive = $filename;
1179# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1180#
1181# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1182# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1183 }
1184 else {
1185 $block_hash->{'file_blocks'}->{$filename} = 1;
1186 }
1187}
[23388]1188
[23561]1189
[18441]1190sub filename_is_absolute
1191{
1192 my ($filename) = @_;
1193
1194 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1195 return ($filename =~ m/^(\w:)?\\/);
1196 }
1197 else {
1198 return ($filename =~ m/^\//);
1199 }
1200}
1201
1202
[17572]1203## @method make_absolute()
1204#
1205# Ensure the given file path is absolute in respect to the given base path.
1206#
1207# @param $base_dir A string denoting the base path the given dir must be
1208# absolute to.
1209# @param $dir The directory to be made absolute as a string. Note that the
1210# dir may already be absolute, in which case it will remain
1211# unchanged.
1212# @return The now absolute form of the directory as a string.
1213#
1214# @author John Thompson, DL Consulting Ltd.
1215# @copy 2006 DL Consulting Ltd.
1216#
1217#used in buildcol.pl, doesn't work for all cases --kjdon
1218sub make_absolute {
1219
1220 my ($base_dir, $dir) = @_;
[18441]1221### print STDERR "dir = $dir\n";
[17572]1222 $dir =~ s/[\\\/]+/\//g;
1223 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1224 $dir =~ s|^/tmp_mnt||;
1225 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1226 $dir =~ s|/[.][.]?/|/|g;
1227 $dir =~ tr|/|/|s;
[18441]1228### print STDERR "dir = $dir\n";
[17572]1229
1230 return $dir;
1231}
1232## make_absolute() ##
[10281]1233
[7929]1234sub get_dirsep {
1235
1236 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1237 return "\\";
1238 } else {
1239 return "\/";
1240 }
1241}
1242
[619]1243sub get_os_dirsep {
[4]1244
[619]1245 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1246 return "\\\\";
1247 } else {
1248 return "\\\/";
1249 }
1250}
1251
1252sub get_re_dirsep {
1253
1254 return "\\\\|\\\/";
1255}
1256
1257
[15003]1258sub get_dirsep_tail {
1259 my ($filename) = @_;
1260
1261 # returns last part of directory or filename
1262 # On unix e.g. a/b.d => b.d
1263 # a/b/c => c
1264
[15088]1265 my $dirsep = get_re_dirsep();
1266 my @dirs = split (/$dirsep/, $filename);
1267 my $tail = pop @dirs;
[15003]1268
[15088]1269 # - caused problems under windows
1270 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1271
[15003]1272 return $tail;
1273}
1274
1275
[4]1276# if this is running on windows we want binaries to end in
1277# .exe, otherwise they don't have to end in any extension
1278sub get_os_exe {
1279 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1280 return "";
1281}
1282
1283
[86]1284# test to see whether this is a big or little endian machine
[15713]1285sub is_little_endian
1286{
1287 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1288 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1289 # Otherwise, it's little endian
1290
1291 #return 0 if $^O =~ /^darwin$/i;
[17714]1292 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1293
1294 # Going back to stating exactly whether the machine is little endian
1295 # or big endian, without any special case for Macs. Since for rata it comes
1296 # back with little endian and for shuttle with bigendian.
[15713]1297 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]1298}
[4]1299
[86]1300
[135]1301# will return the collection name if successful, "" otherwise
1302sub use_collection {
[1454]1303 my ($collection, $collectdir) = @_;
[135]1304
[1454]1305 if (!defined $collectdir || $collectdir eq "") {
1306 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1307 }
1308
[135]1309 # get and check the collection
1310 if (!defined($collection) || $collection eq "") {
1311 if (defined $ENV{'GSDLCOLLECTION'}) {
1312 $collection = $ENV{'GSDLCOLLECTION'};
1313 } else {
[2359]1314 print STDOUT "No collection specified\n";
[135]1315 return "";
1316 }
1317 }
1318
1319 if ($collection eq "modelcol") {
[2359]1320 print STDOUT "You can't use modelcol.\n";
[135]1321 return "";
1322 }
1323
1324 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1325 # are defined
[17204]1326 $ENV{'GSDLCOLLECTION'} = $collection;
[1454]1327 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
[135]1328
1329 # make sure this collection exists
1330 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]1331 print STDOUT "Invalid collection ($collection).\n";
[135]1332 return "";
1333 }
1334
1335 # everything is ready to go
1336 return $collection;
1337}
1338
[21207]1339sub get_current_collection_name {
1340 return $ENV{'GSDLCOLLECTION'};
1341}
[14926]1342
1343
1344# will return the collection name if successful, "" otherwise.
1345# Like use_collection (above) but for greenstone 3 (taking account of site level)
1346
1347sub use_site_collection {
1348 my ($site, $collection, $collectdir) = @_;
1349
1350 if (!defined $collectdir || $collectdir eq "") {
1351 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1352 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1353 }
1354
1355 # collectdir explicitly set by this point (using $site variable if required).
1356 # Can call "old" gsdl2 use_collection now.
1357
1358 return use_collection($collection,$collectdir);
1359}
1360
1361
1362
[15018]1363sub locate_config_file
1364{
1365 my ($file) = @_;
1366
1367 my $locations = locate_config_files($file);
1368
1369 return shift @$locations; # returns undef if 'locations' is empty
1370}
1371
1372
1373sub locate_config_files
1374{
1375 my ($file) = @_;
1376
1377 my @locations = ();
1378
1379 if (-e $file) {
1380 # Clearly specified (most likely full filename)
1381 # No need to hunt in 'etc' directories, return value unchanged
1382 push(@locations,$file);
1383 }
1384 else {
1385 # Check for collection specific one before looking in global GSDL 'etc'
[16969]1386 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1387 my $test_collect_etc_filename
1388 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1389
1390 if (-e $test_collect_etc_filename) {
1391 push(@locations,$test_collect_etc_filename);
1392 }
[15018]1393 }
1394 my $test_main_etc_filename
1395 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1396 if (-e $test_main_etc_filename) {
1397 push(@locations,$test_main_etc_filename);
1398 }
1399 }
1400
1401 return \@locations;
1402}
1403
1404
[9955]1405sub hyperlink_text
1406{
1407 my ($text) = @_;
1408
1409 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1410 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1411
1412 return $text;
1413}
1414
1415
[16436]1416# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1417# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1418sub is_dir_empty
1419{
1420 my ($path) = @_;
1421 opendir DIR, $path;
1422 while(my $entry = readdir DIR) {
1423 next if($entry =~ /^\.\.?$/);
1424 closedir DIR;
1425 return 0;
1426 }
1427 closedir DIR;
1428 return 1;
1429}
1430
[18337]1431# Returns the given filename converted using either URL encoding or base64
1432# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]1433# (if it is just the tailname), then $no_suffix should be some defined value.
1434# rename_method can be url, none, base64
[18319]1435sub rename_file {
[18337]1436 my ($filename, $rename_method, $no_suffix) = @_;
[18329]1437
[18337]1438 if(!$filename) { # undefined or empty string
[18329]1439 return $filename;
1440 }
[18319]1441
[20413]1442 if (!$rename_method) {
1443 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1444 # Debugging information
[22856]1445 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1446 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]1447 $rename_method = "url";
1448 } elsif($rename_method eq "none") {
1449 return $filename; # would have already been renamed
1450 }
1451
[19762]1452 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1453 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1454 ###$filename =~ s/ /_/g;
[18337]1455
1456 my ($tailname,$dirname,$suffix);
1457 if($no_suffix) { # given a tailname, no suffix
1458 ($tailname,$dirname) = File::Basename::fileparse($filename);
1459 }
1460 else {
1461 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1462 }
[23388]1463 if (!$suffix) {
1464 $suffix = "";
1465 }
1466 else {
1467 $suffix = lc($suffix);
1468 }
[18337]1469
[20413]1470 if ($rename_method eq "url") {
[18319]1471 $tailname = &unicode::url_encode($tailname);
1472 }
1473 elsif ($rename_method eq "base64") {
[18341]1474 $tailname = &unicode::base64_encode($tailname);
[18319]1475 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1476 }
[18326]1477
[18319]1478 $filename = "$tailname$suffix";
[18326]1479 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]1480
1481 return $filename;
1482}
1483
[21616]1484
1485# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]1486sub rename_ldb_or_bdb_file {
[18657]1487 my ($filename_no_ext) = @_;
1488
1489 my $new_filename = "$filename_no_ext.gdb";
[21615]1490 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]1491 # try ldb
1492 my $old_filename = "$filename_no_ext.ldb";
1493
1494 if (-f $old_filename) {
[19056]1495 print STDERR "Renaming $old_filename to $new_filename\n";
1496 rename ($old_filename, $new_filename)
1497 || print STDERR "Rename failed: $!\n";
[18657]1498 return;
1499 }
1500 # try bdb
1501 $old_filename = "$filename_no_ext.bdb";
1502 if (-f $old_filename) {
[19056]1503 print STDERR "Renaming $old_filename to $new_filename\n";
1504 rename ($old_filename, $new_filename)
1505 || print STDERR "Rename failed: $!\n";
[18657]1506 return;
1507 }
1508}
1509
1510
[21719]1511# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1512# By default, /greenstone3 for GS3 or /greenstone for GS2.
1513sub get_greenstone_url_prefix() {
1514 # if already set on a previous occasion, just return that
1515 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1516 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1517
[21719]1518 my ($configfile, $urlprefix, $defaultUrlprefix);
1519 my @propertynames = ();
1520
1521 if($ENV{'GSDL3SRCHOME'}) {
1522 $defaultUrlprefix = "/greenstone3";
1523 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1524 push(@propertynames, qw/path\s*\=/);
1525 } else {
1526 $defaultUrlprefix = "/greenstone";
1527 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1528 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1529 }
1530
1531 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1532
1533 if(!$urlprefix) { # no values found for URL prefix, use default values
1534 $urlprefix = $defaultUrlprefix;
1535 } else {
1536 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1537 $urlprefix =~ s/^\///; # remove the starting slash
1538 my @dirs = split(/(\\|\/)/, $urlprefix);
1539 $urlprefix = shift(@dirs);
1540
1541 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1542 $urlprefix = "/$urlprefix";
1543 }
1544 }
1545
1546 # set for the future
1547 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1548# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1549 return $urlprefix;
1550}
1551
1552
1553# Given a config file (xml or java properties file) and a list/array of regular expressions
1554# that represent property names to match on, this function will return the value for the 1st
1555# matching property name. If the return value is undefined, no matching property was found.
1556sub extract_propvalue_from_file() {
1557 my ($configfile, $propertynames) = @_;
1558
1559 my $value;
1560 unless(open(FIN, "<$configfile")) {
1561 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1562 return $value; # not initialised
1563 }
1564
1565 # Read the entire file at once, as one single line, then close it
1566 my $filecontents;
1567 {
1568 local $/ = undef;
1569 $filecontents = <FIN>;
1570 }
1571 close(FIN);
1572
1573 foreach my $regex (@$propertynames) {
1574 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1575 if($value) {
1576 $value =~ s/^\"//; # remove any startquotes
1577 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1578 last; # found value for a matching property, break from loop
1579 }
1580 }
1581
1582 return $value;
1583}
1584
[23306]1585# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1586# given that perllib is in @INC in order to invoke this subroutine.
1587# Call as follows -- after setting up INC to include perllib and
1588# after setting up GSDLHOME and GSDLOS:
1589#
1590# require util;
1591# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1592#
1593sub setup_greenstone_env() {
1594 my ($GSDLHOME, $GSDLOS) = @_;
1595
1596 #my %env_map = ();
1597 # Get the localised ENV settings of running a localised source setup.bash
[23314]1598 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1599 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1600 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[23306]1601 if($GSDLOS =~ m/windows/i) {
[23314]1602 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1603 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1604 }
1605 if (!open(PIN, "$perl_command |")) {
1606 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1607 }
[23306]1608
1609 while (defined (my $perl_output_line = <PIN>)) {
1610 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1611 #$env_map{$key}=$value;
1612 $ENV{$key}=$value;
1613 }
[24563]1614 close (PIN);
1615
[23306]1616 # If any keys in $ENV don't occur in Greenstone's localised env
1617 # (stored in $env_map), delete those entries from $ENV
1618 #foreach $key (keys %ENV) {
1619 # if(!defined $env_map{$key}) {
1620 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1621 # delete $ENV{$key}; # del $ENV(key, value) pair
1622 # }
1623 #}
1624 #undef %env_map;
1625}
1626
[24362]1627sub get_perl_exec() {
1628 my $perl_exec = $^X; # may return just "perl"
1629
1630 if($ENV{'PERLPATH'}) {
1631 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1632 if($ENV{'GSDLOS'} =~ m/windows/) {
1633 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1634 } else {
1635 $perl_exec = "$ENV{'PERLPATH'}/perl";
1636 }
1637 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1638 # containing the full path to the current perl executable we're using
1639 $perl_exec = $Config{perlpath}; # configured path for perl
1640 if (!-e $perl_exec) { # may not point to location on this machine
1641 $perl_exec = $^X; # may return just "perl"
1642 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1643 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1644 }
1645 }
1646 }
1647
1648 return $perl_exec;
1649}
1650
1651
[4]16521;
Note: See TracBrowser for help on using the repository browser.