source: main/trunk/greenstone2/perllib/util.pm@ 25577

Last change on this file since 25577 was 25577, checked in by ak19, 12 years ago

Moved some more general methods from activate.pl to util.pm and in the recently added sub mv_dir_contents, use rmdir to remove empty dirs rather than using rm_r.

  • Property svn:keywords set to Author Date Id Revision
File size: 51.5 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
29
30use Encode;
[4]31use File::Copy;
[619]32use File::Basename;
[24362]33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
[4]38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
[18469]42
[4]43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
[10046]47 foreach my $file (@files) {
[4]48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
[721]50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
[4]52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
[23249]66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
[4]71
[23249]72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
[10211]84
[23249]85
[4]86# recursive removal
[10211]87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
[4]89
[24291]90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
[10211]94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
[4]96 # recursively remove the files
[10211]97 foreach my $file (@files_array) {
[4]98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
[10211]101 print STDERR "util::filtered_rm_r $file does not exist\n";
[4]102
[721]103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
[4]104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
[10211]106 print STDERR "util::filtered_rm_r could not open directory $file\n";
[4]107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
[10211]110
[4]111 # remove all the files in this directory
[10211]112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
[4]114
[10211]115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
[4]120 }
121 }
[10211]122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
[4]124
[10211]125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
[4]129 }
130 }
131}
132
[10211]133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
[721]147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
[4]151
[721]152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
[8716]167 foreach my $file (@srcfiles) {
[721]168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 rename ($file, $tempdest);
177 }
178 }
179}
180
[25554]181# Move the contents of source directory into target directory
182# (as opposed to merely replacing target dir with the src dir)
183# This can overwrite any files with duplicate names in the target
184# but other files and folders in the target will continue to exist
185sub mv_dir_contents {
186 my ($src_dir, $dest_dir) = @_;
187
188 # Obtain listing of all files within src_dir
189 # Note that readdir lists relative paths, as well as . and ..
190 opendir(DIR, "$src_dir");
191 my @files= readdir(DIR);
192 close(DIR);
193
194 # construct absolute paths
[25572]195 my @full_path_files = ();
[25554]196 foreach my $file (@files) {
197 # process all except . and ..
[25572]198 unless($file eq "." || $file eq "..") {
199 my $tailname = $file;
200 $file = &filename_cat($src_dir, $file);
201
202 if(-d $file) { # recursion on directories
203 #print STDERR "**** $file is a directory, to be copied to $dest_dir\\$tailname\n";
204 &mv_dir_contents($file, &filename_cat($dest_dir, $tailname));
205
206 # now all content is moved across, delete empty dir in source folder
207 if(&is_dir_empty($file)) {
[25577]208 if (!rmdir $file) {
209 print STDERR "util::mv_dir_contents couldn't remove directory $file\n";
210 }
[25572]211 } else { # error
212 print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_dir\n";
213 }
214 } else { # process files with a simple move
215 push(@full_path_files, $file);
216 }
217 }
[25554]218 }
[25572]219
220 if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist
221 &mk_dir($dest_dir);
222 }
223
224 #print STDERR "@@@@@ Copying files to: $dest_dir\n";
225 if(@full_path_files) { # if non-empty, there's something to copy across
226 &mv(@full_path_files, $dest_dir);
227 }
[25554]228}
[721]229
[25554]230
[4]231# copies a file or a group of files
232sub cp {
233 my $dest = pop (@_);
234 my (@srcfiles) = @_;
235
236 # remove trailing slashes from source and destination files
237 $dest =~ s/[\\\/]+$//;
238 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
239
240 # a few sanity checks
241 if (scalar (@srcfiles) == 0) {
242 print STDERR "util::cp no destination directory given\n";
243 return;
244 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
245 print STDERR "util::cp if multiple source files are given the ".
246 "destination must be a directory\n";
247 return;
248 }
249
250 # copy the files
[8716]251 foreach my $file (@srcfiles) {
[4]252 my $tempdest = $dest;
253 if (-d $tempdest) {
254 my ($filename) = $file =~ /([^\\\/]+)$/;
255 $tempdest .= "/$filename";
256 }
257 if (!-e $file) {
258 print STDERR "util::cp $file does not exist\n";
259 } elsif (!-f $file) {
260 print STDERR "util::cp $file is not a plain file\n";
261 } else {
262 &File::Copy::copy ($file, $tempdest);
263 }
264 }
265}
266
267
[721]268
[4]269# recursively copies a file or group of files
[1454]270# syntax: cp_r (sourcefiles, destination directory)
271# destination must be a directory - to copy one file to
272# another use cp instead
[4]273sub cp_r {
274 my $dest = pop (@_);
275 my (@srcfiles) = @_;
276
277 # a few sanity checks
278 if (scalar (@srcfiles) == 0) {
[1454]279 print STDERR "util::cp_r no destination directory given\n";
[4]280 return;
[1454]281 } elsif (-f $dest) {
282 print STDERR "util::cp_r destination must be a directory\n";
[4]283 return;
284 }
285
[1454]286 # create destination directory if it doesn't exist already
287 if (! -d $dest) {
288 my $store_umask = umask(0002);
289 mkdir ($dest, 0777);
290 umask($store_umask);
291 }
292
[4]293 # copy the files
[8716]294 foreach my $file (@srcfiles) {
[4]295
296 if (!-e $file) {
[1454]297 print STDERR "util::cp_r $file does not exist\n";
[4]298
299 } elsif (-d $file) {
[1586]300 # make the new directory
301 my ($filename) = $file =~ /([^\\\/]*)$/;
302 $dest = &util::filename_cat ($dest, $filename);
303 my $store_umask = umask(0002);
304 mkdir ($dest, 0777);
305 umask($store_umask);
[836]306
[4]307 # get the contents of this directory
308 if (!opendir (INDIR, $file)) {
309 print STDERR "util::cp_r could not open directory $file\n";
310 } else {
[1454]311 my @filedir = readdir (INDIR);
[4]312 closedir (INDIR);
[8716]313 foreach my $f (@filedir) {
[1454]314 next if $f =~ /^\.\.?$/;
315 # copy all the files in this directory
316 my $ff = &util::filename_cat ($file, $f);
317 &cp_r ($ff, $dest);
318 }
[4]319 }
320
321 } else {
[1454]322 &cp($file, $dest);
[4]323 }
324 }
325}
[21762]326# recursively copies a file or group of files
327# syntax: cp_r (sourcefiles, destination directory)
328# destination must be a directory - to copy one file to
329# another use cp instead
330sub cp_r_nosvn {
331 my $dest = pop (@_);
332 my (@srcfiles) = @_;
[4]333
[21762]334 # a few sanity checks
335 if (scalar (@srcfiles) == 0) {
336 print STDERR "util::cp_r no destination directory given\n";
337 return;
338 } elsif (-f $dest) {
339 print STDERR "util::cp_r destination must be a directory\n";
340 return;
341 }
342
343 # create destination directory if it doesn't exist already
344 if (! -d $dest) {
345 my $store_umask = umask(0002);
346 mkdir ($dest, 0777);
347 umask($store_umask);
348 }
349
350 # copy the files
351 foreach my $file (@srcfiles) {
352
353 if (!-e $file) {
354 print STDERR "util::cp_r $file does not exist\n";
355
356 } elsif (-d $file) {
357 # make the new directory
358 my ($filename) = $file =~ /([^\\\/]*)$/;
359 $dest = &util::filename_cat ($dest, $filename);
360 my $store_umask = umask(0002);
361 mkdir ($dest, 0777);
362 umask($store_umask);
363
364 # get the contents of this directory
365 if (!opendir (INDIR, $file)) {
366 print STDERR "util::cp_r could not open directory $file\n";
367 } else {
368 my @filedir = readdir (INDIR);
369 closedir (INDIR);
370 foreach my $f (@filedir) {
371 next if $f =~ /^\.\.?$/;
372 next if $f =~ /^\.svn$/;
373 # copy all the files in this directory
374 my $ff = &util::filename_cat ($file, $f);
375 &cp_r ($ff, $dest);
376 }
377 }
378
379 } else {
380 &cp($file, $dest);
381 }
382 }
383}
384
[11179]385# copies a directory and its contents, excluding subdirectories, into a new directory
386sub cp_r_toplevel {
387 my $dest = pop (@_);
388 my (@srcfiles) = @_;
[4]389
[11179]390 # a few sanity checks
391 if (scalar (@srcfiles) == 0) {
392 print STDERR "util::cp_r no destination directory given\n";
393 return;
394 } elsif (-f $dest) {
395 print STDERR "util::cp_r destination must be a directory\n";
396 return;
397 }
398
399 # create destination directory if it doesn't exist already
400 if (! -d $dest) {
401 my $store_umask = umask(0002);
402 mkdir ($dest, 0777);
403 umask($store_umask);
404 }
405
406 # copy the files
407 foreach my $file (@srcfiles) {
408
409 if (!-e $file) {
410 print STDERR "util::cp_r $file does not exist\n";
411
412 } elsif (-d $file) {
413 # make the new directory
414 my ($filename) = $file =~ /([^\\\/]*)$/;
415 $dest = &util::filename_cat ($dest, $filename);
416 my $store_umask = umask(0002);
417 mkdir ($dest, 0777);
418 umask($store_umask);
419
420 # get the contents of this directory
421 if (!opendir (INDIR, $file)) {
422 print STDERR "util::cp_r could not open directory $file\n";
423 } else {
424 my @filedir = readdir (INDIR);
425 closedir (INDIR);
426 foreach my $f (@filedir) {
427 next if $f =~ /^\.\.?$/;
428
429 # copy all the files in this directory, but not directories
430 my $ff = &util::filename_cat ($file, $f);
431 if (-f $ff) {
432 &cp($ff, $dest);
433 #&cp_r ($ff, $dest);
434 }
435 }
436 }
437
438 } else {
439 &cp($file, $dest);
440 }
441 }
442}
443
[721]444sub mk_dir {
445 my ($dir) = @_;
446
[836]447 my $store_umask = umask(0002);
448 my $mkdir_ok = mkdir ($dir, 0777);
449 umask($store_umask);
450
451 if (!$mkdir_ok)
452 {
[721]453 print STDERR "util::mk_dir could not create directory $dir\n";
454 return;
455 }
456}
457
[1046]458# in case anyone cares - I did some testing (using perls Benchmark module)
459# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
460# slightly faster (surprisingly) - Stefan.
[4]461sub mk_all_dir {
462 my ($dir) = @_;
463
464 # use / for the directory separator, remove duplicate and
465 # trailing slashes
466 $dir=~s/[\\\/]+/\//g;
467 $dir=~s/[\\\/]+$//;
468
469 # make sure the cache directory exists
470 my $dirsofar = "";
471 my $first = 1;
[8716]472 foreach my $dirname (split ("/", $dir)) {
[4]473 $dirsofar .= "/" unless $first;
474 $first = 0;
475
476 $dirsofar .= $dirname;
477
478 next if $dirname =~ /^(|[a-z]:)$/i;
[836]479 if (!-e $dirsofar)
480 {
481 my $store_umask = umask(0002);
482 my $mkdir_ok = mkdir ($dirsofar, 0777);
483 umask($store_umask);
484 if (!$mkdir_ok)
485 {
486 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
487 return;
488 }
489 }
[4]490 }
491}
492
[619]493# make hard link to file if supported by OS, otherwise copy the file
494sub hard_link {
[18463]495 my ($src, $dest, $verbosity) = @_;
[4]496
[619]497 # remove trailing slashes from source and destination files
498 $src =~ s/[\\\/]+$//;
499 $dest =~ s/[\\\/]+$//;
500
[23307]501## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
[619]502 # a few sanity checks
[812]503 if (-e $dest) {
504 # destination file already exists
505 return;
506 }
507 elsif (!-e $src) {
[23307]508 print STDERR "util::hard_link source file \"$src\" does not exist\n";
[3628]509 return 1;
[619]510 }
511 elsif (-d $src) {
[23307]512 print STDERR "util::hard_link source \"$src\" is a directory\n";
[3628]513 return 1;
[619]514 }
515
516 my $dest_dir = &File::Basename::dirname($dest);
517 mk_all_dir($dest_dir) if (!-e $dest_dir);
518
[14365]519
[22119]520 if (!link($src, $dest)) {
[18463]521 if ((!defined $verbosity) || ($verbosity>2)) {
522 print STDERR "util::hard_link: unable to create hard link. ";
523 print STDERR " Copying file: $src -> $dest\n";
524 }
[14365]525 &File::Copy::copy ($src, $dest);
[619]526 }
[3628]527 return 0;
[619]528}
529
[2193]530# make soft link to file if supported by OS, otherwise copy file
[721]531sub soft_link {
[15165]532 my ($src, $dest, $ensure_paths_absolute) = @_;
[619]533
[721]534 # remove trailing slashes from source and destination files
535 $src =~ s/[\\\/]+$//;
536 $dest =~ s/[\\\/]+$//;
[619]537
[15165]538 # Ensure file paths are absolute IF requested to do so
539 # Soft_linking didn't work for relative paths
540 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
541 # We need to ensure that the src file is the absolute path
542 # See http://perldoc.perl.org/File/Spec.html
543 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
544 $src = File::Spec->rel2abs($src); # make absolute
545 }
546 # Might as well ensure that the destination file's absolute path is used
547 if(!File::Spec->file_name_is_absolute( $dest )) {
548 $dest = File::Spec->rel2abs($dest); # make absolute
549 }
550 }
551
[721]552 # a few sanity checks
553 if (!-e $src) {
554 print STDERR "util::soft_link source file $src does not exist\n";
555 return 0;
556 }
[619]557
[721]558 my $dest_dir = &File::Basename::dirname($dest);
559 mk_all_dir($dest_dir) if (!-e $dest_dir);
[14365]560
[2193]561 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[23484]562
[14365]563 # symlink not supported on windows
564 &File::Copy::copy ($src, $dest);
[2193]565
566 } elsif (!eval {symlink($src, $dest)}) {
[2974]567 print STDERR "util::soft_link: unable to create soft link.\n";
[721]568 return 0;
569 }
570
571 return 1;
572}
573
[23362]574# Primarily for filenames generated by processing
575# content of HTML files (which are mapped to UTF-8 internally)
576#
577# To turn this into an octet string that really exists on the file
578# system:
579# 1. don't need to do anything special for Unix-based systems
580# (as underlying file system is byte-code)
581# 2. need to map to short DOS filenames for Windows
[721]582
[23362]583sub utf8_to_real_filename
584{
585 my ($utf8_filename) = @_;
[721]586
[23362]587 my $real_filename;
[721]588
[23362]589 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
590 require Win32;
[23388]591
592 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
593
[23362]594 my $unicode_filename = decode("utf8",$utf8_filename);
595 $real_filename = Win32::GetShortPathName($unicode_filename);
596 }
597 else {
598 $real_filename = $utf8_filename;
599 }
600
601 return $real_filename;
602}
603
604
605sub fd_exists
606{
607 my $filename_full_path = shift @_;
608 my $test_op = shift @_ || "-e";
609
610 # By default tests for existance of file or directory (-e)
611 # Can be made more specific by providing second parameter (e.g. -f or -d)
612
613 my $exists = 0;
614
615 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
616 require Win32;
617 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
618 if (!defined $filename_short_path) {
619 # Was probably still in UTF8 form (not what is needed on Windows)
620 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
621 if (defined $unicode_filename_full_path) {
622 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
623 }
624 }
625 $filename_full_path = $filename_short_path;
626 }
627
628 if (defined $filename_full_path) {
629 $exists = eval "($test_op \$filename_full_path)";
630 }
631
632 return $exists;
633}
634
635sub file_exists
636{
637 my ($filename_full_path) = @_;
638
639 return fd_exists($filename_full_path,"-f");
640}
641
642sub dir_exists
643{
644 my ($filename_full_path) = @_;
645
646 return fd_exists($filename_full_path,"-d");
647}
648
649
650
[4]651# updates a copy of a directory in some other part of the filesystem
652# verbosity settings are: 0=low, 1=normal, 2=high
653# both $fromdir and $todir should be absolute paths
654sub cachedir {
655 my ($fromdir, $todir, $verbosity) = @_;
656 $verbosity = 1 unless defined $verbosity;
657
658 # use / for the directory separator, remove duplicate and
659 # trailing slashes
660 $fromdir=~s/[\\\/]+/\//g;
661 $fromdir=~s/[\\\/]+$//;
662 $todir=~s/[\\\/]+/\//g;
663 $todir=~s/[\\\/]+$//;
664
665 &mk_all_dir ($todir);
666
667 # get the directories in ascending order
668 if (!opendir (FROMDIR, $fromdir)) {
669 print STDERR "util::cachedir could not read directory $fromdir\n";
670 return;
671 }
672 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
673 closedir (FROMDIR);
674
675 if (!opendir (TODIR, $todir)) {
676 print STDERR "util::cacedir could not read directory $todir\n";
677 return;
678 }
679 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
680 closedir (TODIR);
681
682 my $fromi = 0;
683 my $toi = 0;
684
685 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
686# print "fromi: $fromi toi: $toi\n";
687
688 # see if we should delete a file/directory
689 # this should happen if the file/directory
690 # is not in the from list or if its a different
691 # size, or has an older timestamp
692 if ($toi < scalar(@todir)) {
693 if (($fromi >= scalar(@fromdir)) ||
694 ($todir[$toi] lt $fromdir[$fromi] ||
695 ($todir[$toi] eq $fromdir[$fromi] &&
696 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
697 $verbosity)))) {
698
699 # the files are different
700 &rm_r("$todir/$todir[$toi]");
701 splice(@todir, $toi, 1); # $toi stays the same
702
703 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
704 # the files are the same
705 # if it is a directory, check its contents
706 if (-d "$todir/$todir[$toi]") {
707 &cachedir ("$fromdir/$fromdir[$fromi]",
708 "$todir/$todir[$toi]", $verbosity);
709 }
710
711 $toi++;
712 $fromi++;
713 next;
714 }
715 }
716
717 # see if we should insert a file/directory
718 # we should insert a file/directory if there
719 # is no tofiles left or if the tofile does not exist
720 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
721 $todir[$toi] gt $fromdir[$fromi])) {
722 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
723 splice (@todir, $toi, 0, $fromdir[$fromi]);
724
725 $toi++;
726 $fromi++;
727 }
728 }
729}
730
731# this function returns -1 if either file is not found
732# assumes that $file1 and $file2 are absolute file names or
733# in the current directory
734# $file2 is allowed to be newer than $file1
735sub differentfiles {
736 my ($file1, $file2, $verbosity) = @_;
737 $verbosity = 1 unless defined $verbosity;
738
739 $file1 =~ s/\/+$//;
740 $file2 =~ s/\/+$//;
741
742 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
743 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
744
745 return -1 unless (-e $file1 && -e $file2);
746 if ($file1name ne $file2name) {
747 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
748 return 1;
749 }
750
[8716]751 my @file1stat = stat ($file1);
752 my @file2stat = stat ($file2);
[4]753
754 if (-d $file1) {
755 if (! -d $file2) {
756 print STDERR "one file is a directory\n" if ($verbosity >= 2);
757 return 1;
758 }
759 return 0;
760 }
761
762 # both must be regular files
763 unless (-f $file1 && -f $file2) {
764 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
765 return 1;
766 }
767
768 # the size of the files must be the same
769 if ($file1stat[7] != $file2stat[7]) {
770 print STDERR "different sized files\n" if ($verbosity >= 2);
771 return 1;
772 }
773
774 # the second file cannot be older than the first
775 if ($file1stat[9] > $file2stat[9]) {
776 print STDERR "file is older\n" if ($verbosity >= 2);
777 return 1;
778 }
779
780 return 0;
781}
782
783
[16266]784sub get_tmp_filename
785{
786 my $file_ext = shift(@_) || undef;
787
[22438]788 my $opt_dot_file_ext = "";
789 if (defined $file_ext) {
790 if ($file_ext !~ m/\./) {
791 # no dot, so needs one added in at start
792 $opt_dot_file_ext = ".$file_ext"
793 }
794 else {
795 # allow for "extensions" such as _metadata.txt to be handled
796 # gracefully
797 $opt_dot_file_ext = $file_ext;
798 }
799 }
[16266]800
[2795]801 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
[4]802 &mk_all_dir ($tmpdir) unless -e $tmpdir;
803
804 my $count = 1000;
805 my $rand = int(rand $count);
[16266]806 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
807
808 while (-e $full_tmp_filename) {
[4]809 $rand = int(rand $count);
[16266]810 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
[4]811 $count++;
812 }
[16266]813
814 return $full_tmp_filename;
[4]815}
816
[22886]817sub get_timestamped_tmp_folder
[22873]818{
819
[22886]820 my $tmp_dirname;
[22873]821 if(defined $ENV{'GSDLCOLLECTDIR'}) {
822 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
823 } elsif(defined $ENV{'GSDLHOME'}) {
824 $tmp_dirname = $ENV{'GSDLHOME'};
[22886]825 } else {
826 return undef;
[22873]827 }
828
829 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
830 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
831
832 # add the timestamp into the path otherwise we can run into problems
833 # if documents have the same name
834 my $timestamp = time;
835 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
836 $tmp_dirname = $time_tmp_dirname;
837 my $i = 1;
838 while (-e $tmp_dirname) {
839 $tmp_dirname = "$time_tmp_dirname$i";
840 $i++;
841 }
842 &util::mk_dir($tmp_dirname);
843
[22886]844 return $tmp_dirname;
845}
[22873]846
[22886]847sub get_timestamped_tmp_filename_in_collection
848{
849
850 my ($input_filename, $output_ext) = @_;
851 # derive tmp filename from input filename
852 my ($tailname, $dirname, $suffix)
853 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
854
855 # softlink to collection tmp dir
856 my $tmp_dirname = &util::get_timestamped_tmp_folder();
857 $tmp_dirname = $dirname unless defined $tmp_dirname;
858
[22873]859 # following two steps copied from ConvertBinaryFile
[22886]860 # do we need them?? can't use them as is, as they use plugin methods.
861
[22873]862 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
863
864 # URLEncode this since htmls with images where the html filename is utf8 don't seem
865 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
866 # files on the filesystem.
867 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
868 if (defined $output_ext) {
869 $output_ext = ".$output_ext"; # add the dot
870 } else {
871 $output_ext = $suffix;
872 }
873 $output_ext= lc($output_ext);
874 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
875
876 return $tmp_filename;
877}
878
[21218]879sub get_toplevel_tmp_dir
880{
881 return filename_cat($ENV{'GSDLHOME'}, "tmp");
882}
883
884
[17512]885sub filename_to_regex {
886 my $filename = shift (@_);
[4]887
[24971]888 # need to make single backslashes double so that regex works
[24832]889 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
[24829]890
[24832]891 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
892 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
[24829]893 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
894 $filename =~ s@\(@\\(@g; # escape brackets
895 $filename =~ s@\)@\\)@g; # escape brackets
[24932]896 $filename =~ s@\[@\\[@g; # escape brackets
897 $filename =~ s@\]@\\]@g; # escape brackets
[24829]898
[17512]899 return $filename;
900}
901
[24829]902sub unregex_filename {
903 my $filename = shift (@_);
904
905 # need to put doubled backslashes for regex back to single
906 $filename =~ s/\\\./\./g; # remove RE syntax for .
907 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
908 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
[24932]909 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
910 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
[24940]911
912 # \\ goes to \
913 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
914 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
[24829]915 return $filename;
916}
917
[4]918sub filename_cat {
[7507]919 my $first_file = shift(@_);
[4]920 my (@filenames) = @_;
[10146]921
[16266]922# Useful for debugging
923# -- might make sense to call caller(0) rather than (1)??
924# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
[22856]925# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
[18913]926
927 # If first_file is not null or empty, then add it back into the list
928 if (defined $first_file && $first_file =~ /\S/) {
[7507]929 unshift(@filenames, $first_file);
930 }
931
[4]932 my $filename = join("/", @filenames);
933
934 # remove duplicate slashes and remove the last slash
[488]935 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
936 $filename =~ s/[\\\/]+/\\/g;
937 } else {
[836]938 $filename =~ s/[\/]+/\//g;
939 # DB: want a filename abc\de.html to remain like this
[488]940 }
941 $filename =~ s/[\\\/]$//;
[4]942
943 return $filename;
944}
945
[21413]946
947sub pathname_cat {
948 my $first_path = shift(@_);
949 my (@pathnames) = @_;
950
951 # If first_path is not null or empty, then add it back into the list
952 if (defined $first_path && $first_path =~ /\S/) {
953 unshift(@pathnames, $first_path);
954 }
955
[21425]956 my $join_char;
[21413]957 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
958 $join_char = ";";
959 } else {
960 $join_char = ":";
961 }
962
963 my $pathname = join($join_char, @pathnames);
964
965 # remove duplicate slashes
966 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
967 $pathname =~ s/[\\\/]+/\\/g;
968 } else {
969 $pathname =~ s/[\/]+/\//g;
970 # DB: want a pathname abc\de.html to remain like this
971 }
972
973 return $pathname;
974}
975
976
[19616]977sub tidy_up_oid {
978 my ($OID) = @_;
979 if ($OID =~ /\./) {
980 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
981 $OID =~ s/\.//g; #remove any periods
982 }
983 if ($OID =~ /^\s.*\s$/) {
984 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
985 # remove starting and trailing whitespace
986 $OID =~ s/^\s+//;
987 $OID =~ s/\s+$//;
988 }
989 if ($OID =~ /^[\d]*$/) {
990 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
991 $OID = "D" . $OID;
992 }
993
994 return $OID;
995}
[10212]996sub envvar_prepend {
997 my ($var,$val) = @_;
998
[16404]999 # do not prepend any value/path that's already in the environment variable
[24832]1000
1001 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1002 if (!defined($ENV{$var})) {
1003 $ENV{$var} = "$val";
[16442]1004 }
[24832]1005 elsif($ENV{$var} !~ m/$escaped_val/) {
1006 $ENV{$var} = "$val;".$ENV{$var};
[10212]1007 }
1008}
1009
1010sub envvar_append {
1011 my ($var,$val) = @_;
[24832]1012
[16404]1013 # do not append any value/path that's already in the environment variable
[24832]1014
1015 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1016 if (!defined($ENV{$var})) {
1017 $ENV{$var} = "$val";
[16442]1018 }
[24832]1019 elsif($ENV{$var} !~ m/$escaped_val/) {
1020 $ENV{$var} .= ";$val";
1021 }
[10212]1022}
1023
[16442]1024
[16380]1025# splits a filename into a prefix and a tail extension using the tail_re, or
1026# if that fails, splits on the file_extension . (dot)
1027sub get_prefix_and_tail_by_regex {
[10212]1028
[16380]1029 my ($filename,$tail_re) = @_;
1030
1031 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
1032 if ((!defined $file_prefix) || (!defined $file_ext)) {
1033 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1034 }
1035
1036 return ($file_prefix,$file_ext);
1037}
1038
1039# get full path and file only path from a base_dir (which may be empty) and
1040# file (which may contain directories)
1041sub get_full_filenames {
1042 my ($base_dir, $file) = @_;
1043
1044 my $filename_full_path = $file;
1045 # add on directory if present
1046 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1047
1048 my $filename_no_path = $file;
1049
1050 # remove directory if present
1051 $filename_no_path =~ s/^.*[\/\\]//;
1052 return ($filename_full_path, $filename_no_path);
1053}
1054
[8682]1055# returns the path of a file without the filename -- ie. the directory the file is in
1056sub filename_head {
1057 my $filename = shift(@_);
1058
1059 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1060 $filename =~ s/[^\\\\]*$//;
1061 }
1062 else {
1063 $filename =~ s/[^\\\/]*$//;
1064 }
1065
1066 return $filename;
1067}
1068
1069
[23362]1070
[1454]1071# returns 1 if filename1 and filename2 point to the same
1072# file or directory
1073sub filenames_equal {
1074 my ($filename1, $filename2) = @_;
1075
1076 # use filename_cat to clean up trailing slashes and
1077 # multiple slashes
1078 $filename1 = filename_cat ($filename1);
[2516]1079 $filename2 = filename_cat ($filename2);
[1454]1080
1081 # filenames not case sensitive on windows
1082 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1083 $filename1 =~ tr/[A-Z]/[a-z]/;
1084 $filename2 =~ tr/[A-Z]/[a-z]/;
1085 }
1086 return 1 if $filename1 eq $filename2;
1087 return 0;
1088}
1089
[24932]1090# If filename is relative to within_dir, returns the relative path of filename to that directory
1091# with slashes in the filename returned as they were in the original (absolute) filename.
[23362]1092sub filename_within_directory
1093{
1094 my ($filename,$within_dir) = @_;
1095
[23371]1096 if ($within_dir !~ m/[\/\\]$/) {
1097 my $dirsep = &util::get_dirsep();
[23362]1098 $within_dir .= $dirsep;
1099 }
1100
[24829]1101 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]1102 if ($filename =~ m/^$within_dir(.*)$/) {
1103 $filename = $1;
1104 }
1105
1106 return $filename;
1107}
1108
[24932]1109# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1110# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1111# The subpath returned will also be a URL type filename.
1112sub filename_within_directory_url_format
1113{
1114 my ($filename,$within_dir) = @_;
1115
1116 # convert parameters only to / slashes if Windows
1117
[24971]1118 my $filename_urlformat = &filepath_to_url_format($filename);
1119 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
1120
[24932]1121 #if ($within_dir_urlformat !~ m/\/$/) {
1122 # make sure directory ends with a slash
1123 #$within_dir_urlformat .= "/";
1124 #}
1125
1126 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1127
1128 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1129
1130 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1131 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1132 $filename_urlformat = $1;
1133 }
1134
1135 return $filename_urlformat;
1136}
1137
[24971]1138# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
1139# since on Linux it doesn't represent a file separator but an escape char).
1140sub filepath_to_url_format
1141{
1142 my ($filepath) = @_;
1143 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1144 # Only need to worry about Windows, as Unix style directories already in url-format
1145 # Convert Windows style \ => /
1146 $filepath =~ s@\\@/@g;
1147 }
1148 return $filepath;
1149}
[24932]1150
[25093]1151# regex filepaths on windows may include \\ as path separator. Convert \\ to /
1152sub filepath_regex_to_url_format
1153{
1154 my ($filepath) = @_;
1155 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1156 # Only need to worry about Windows, as Unix style directories already in url-format
1157 # Convert Windows style \\ => /
1158 $filepath =~ s@\\\\@/@g;
1159 }
1160 return $filepath;
1161
1162}
[24971]1163
[25093]1164# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
1165# and ignores trailing /
1166# returns (file, dirs) dirs will be empty if no subdirs
1167sub url_fileparse
1168{
1169 my ($filepath) = @_;
1170 # remove trailing /
1171 $filepath =~ s@/$@@;
1172 if ($filepath !~ m@/@) {
1173 return ($filepath, "");
1174 }
1175 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
1176 return ($file, $dirs);
1177
1178}
1179
1180
[10281]1181sub filename_within_collection
1182{
1183 my ($filename) = @_;
1184
1185 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1186
1187 if (defined $collect_dir) {
[23362]1188
[15875]1189 # if from within GSDLCOLLECTDIR, then remove directory prefix
1190 # so source_filename is realative to it. This is done to aid
1191 # portability, i.e. the collection can be moved to somewhere
1192 # else on the file system and the archives directory will still
1193 # work. This is needed, for example in the applet version of
1194 # GLI where GSDLHOME/collect on the server will be different to
1195 # the collect directory of the remove user. Of course,
1196 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1197 # it back into a full pathname.
[23362]1198
1199 $filename = filename_within_directory($filename,$collect_dir);
[10281]1200 }
1201
1202 return $filename;
1203}
1204
[23362]1205sub prettyprint_file
1206{
[23484]1207 my ($base_dir,$file,$gli) = @_;
[23362]1208
1209 my $filename_full_path = &util::filename_cat($base_dir,$file);
1210
1211 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1212 require Win32;
1213
1214 # For some reason base_dir in the form c:/a/b/c
1215 # This leads to confusion later on, so turn it back into
1216 # the more usual Windows form
1217 $base_dir =~ s/\//\\/g;
1218 my $long_base_dir = Win32::GetLongPathName($base_dir);
1219 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1220
1221 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]1222 $file = encode("utf8",$file) if ($gli);
[23362]1223 }
1224
1225 return $file;
1226}
1227
1228
1229sub upgrade_if_dos_filename
1230{
[23371]1231 my ($filename_full_path,$and_encode) = @_;
[23362]1232
1233 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1234 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1235 # to its long (Windows) version
[23416]1236 my $long_filename = Win32::GetLongPathName($filename_full_path);
1237 if (defined $long_filename) {
1238 $filename_full_path = $long_filename;
1239 }
[23362]1240 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]1241 $filename_full_path =~ s/^(.):/\u$1:/;
[23371]1242 if ((defined $and_encode) && ($and_encode)) {
1243 $filename_full_path = encode("utf8",$filename_full_path);
1244 }
[23362]1245 }
1246
1247 return $filename_full_path;
1248}
1249
1250
[23388]1251sub downgrade_if_dos_filename
1252{
1253 my ($filename_full_path) = @_;
1254
1255 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1256 require Win32;
1257
1258 # Ensure the given long Windows filename is in a form that can
1259 # be opened by Perl => convert it to a short DOS-like filename
1260
[23414]1261 my $short_filename = Win32::GetShortPathName($filename_full_path);
1262 if (defined $short_filename) {
1263 $filename_full_path = $short_filename;
1264 }
[23416]1265 # Make sure initial drive letter is lower-case (to fit in
1266 # with rest of Greenstone)
[23483]1267 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]1268 }
1269
1270 return $filename_full_path;
1271}
1272
[23561]1273sub block_filename
1274{
1275 my ($block_hash,$filename) = @_;
1276
1277 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1278
1279 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1280 my $lower_filename = lc($filename);
1281 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1282# my $lower_drive = $filename;
1283# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1284
1285# my $upper_drive = $filename;
1286# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1287#
1288# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1289# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1290 }
1291 else {
1292 $block_hash->{'file_blocks'}->{$filename} = 1;
1293 }
1294}
[23388]1295
[23561]1296
[18441]1297sub filename_is_absolute
1298{
1299 my ($filename) = @_;
1300
1301 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1302 return ($filename =~ m/^(\w:)?\\/);
1303 }
1304 else {
1305 return ($filename =~ m/^\//);
1306 }
1307}
1308
1309
[17572]1310## @method make_absolute()
1311#
1312# Ensure the given file path is absolute in respect to the given base path.
1313#
1314# @param $base_dir A string denoting the base path the given dir must be
1315# absolute to.
1316# @param $dir The directory to be made absolute as a string. Note that the
1317# dir may already be absolute, in which case it will remain
1318# unchanged.
1319# @return The now absolute form of the directory as a string.
1320#
1321# @author John Thompson, DL Consulting Ltd.
1322# @copy 2006 DL Consulting Ltd.
1323#
1324#used in buildcol.pl, doesn't work for all cases --kjdon
1325sub make_absolute {
1326
1327 my ($base_dir, $dir) = @_;
[18441]1328### print STDERR "dir = $dir\n";
[17572]1329 $dir =~ s/[\\\/]+/\//g;
1330 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1331 $dir =~ s|^/tmp_mnt||;
1332 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1333 $dir =~ s|/[.][.]?/|/|g;
1334 $dir =~ tr|/|/|s;
[18441]1335### print STDERR "dir = $dir\n";
[17572]1336
1337 return $dir;
1338}
1339## make_absolute() ##
[10281]1340
[7929]1341sub get_dirsep {
1342
1343 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1344 return "\\";
1345 } else {
1346 return "\/";
1347 }
1348}
1349
[619]1350sub get_os_dirsep {
[4]1351
[619]1352 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1353 return "\\\\";
1354 } else {
1355 return "\\\/";
1356 }
1357}
1358
1359sub get_re_dirsep {
1360
1361 return "\\\\|\\\/";
1362}
1363
1364
[15003]1365sub get_dirsep_tail {
1366 my ($filename) = @_;
1367
1368 # returns last part of directory or filename
1369 # On unix e.g. a/b.d => b.d
1370 # a/b/c => c
1371
[15088]1372 my $dirsep = get_re_dirsep();
1373 my @dirs = split (/$dirsep/, $filename);
1374 my $tail = pop @dirs;
[15003]1375
[15088]1376 # - caused problems under windows
1377 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1378
[15003]1379 return $tail;
1380}
1381
1382
[4]1383# if this is running on windows we want binaries to end in
1384# .exe, otherwise they don't have to end in any extension
1385sub get_os_exe {
1386 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1387 return "";
1388}
1389
1390
[86]1391# test to see whether this is a big or little endian machine
[15713]1392sub is_little_endian
1393{
1394 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1395 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1396 # Otherwise, it's little endian
1397
1398 #return 0 if $^O =~ /^darwin$/i;
[17714]1399 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1400
1401 # Going back to stating exactly whether the machine is little endian
1402 # or big endian, without any special case for Macs. Since for rata it comes
1403 # back with little endian and for shuttle with bigendian.
[15713]1404 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]1405}
[4]1406
[86]1407
[135]1408# will return the collection name if successful, "" otherwise
1409sub use_collection {
[1454]1410 my ($collection, $collectdir) = @_;
[135]1411
[1454]1412 if (!defined $collectdir || $collectdir eq "") {
1413 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1414 }
1415
[135]1416 # get and check the collection
1417 if (!defined($collection) || $collection eq "") {
1418 if (defined $ENV{'GSDLCOLLECTION'}) {
1419 $collection = $ENV{'GSDLCOLLECTION'};
1420 } else {
[2359]1421 print STDOUT "No collection specified\n";
[135]1422 return "";
1423 }
1424 }
1425
1426 if ($collection eq "modelcol") {
[2359]1427 print STDOUT "You can't use modelcol.\n";
[135]1428 return "";
1429 }
1430
1431 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1432 # are defined
[17204]1433 $ENV{'GSDLCOLLECTION'} = $collection;
[1454]1434 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
[135]1435
1436 # make sure this collection exists
1437 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]1438 print STDOUT "Invalid collection ($collection).\n";
[135]1439 return "";
1440 }
1441
1442 # everything is ready to go
1443 return $collection;
1444}
1445
[21207]1446sub get_current_collection_name {
1447 return $ENV{'GSDLCOLLECTION'};
1448}
[14926]1449
1450
1451# will return the collection name if successful, "" otherwise.
1452# Like use_collection (above) but for greenstone 3 (taking account of site level)
1453
1454sub use_site_collection {
1455 my ($site, $collection, $collectdir) = @_;
1456
1457 if (!defined $collectdir || $collectdir eq "") {
1458 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1459 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1460 }
1461
1462 # collectdir explicitly set by this point (using $site variable if required).
1463 # Can call "old" gsdl2 use_collection now.
1464
1465 return use_collection($collection,$collectdir);
1466}
1467
1468
1469
[15018]1470sub locate_config_file
1471{
1472 my ($file) = @_;
1473
1474 my $locations = locate_config_files($file);
1475
1476 return shift @$locations; # returns undef if 'locations' is empty
1477}
1478
1479
1480sub locate_config_files
1481{
1482 my ($file) = @_;
1483
1484 my @locations = ();
1485
1486 if (-e $file) {
1487 # Clearly specified (most likely full filename)
1488 # No need to hunt in 'etc' directories, return value unchanged
1489 push(@locations,$file);
1490 }
1491 else {
1492 # Check for collection specific one before looking in global GSDL 'etc'
[16969]1493 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1494 my $test_collect_etc_filename
1495 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1496
1497 if (-e $test_collect_etc_filename) {
1498 push(@locations,$test_collect_etc_filename);
1499 }
[15018]1500 }
1501 my $test_main_etc_filename
1502 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1503 if (-e $test_main_etc_filename) {
1504 push(@locations,$test_main_etc_filename);
1505 }
1506 }
1507
1508 return \@locations;
1509}
1510
1511
[9955]1512sub hyperlink_text
1513{
1514 my ($text) = @_;
1515
1516 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1517 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1518
1519 return $text;
1520}
1521
1522
[16436]1523# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1524# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1525sub is_dir_empty
1526{
1527 my ($path) = @_;
1528 opendir DIR, $path;
1529 while(my $entry = readdir DIR) {
1530 next if($entry =~ /^\.\.?$/);
1531 closedir DIR;
1532 return 0;
1533 }
1534 closedir DIR;
1535 return 1;
1536}
1537
[18337]1538# Returns the given filename converted using either URL encoding or base64
1539# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]1540# (if it is just the tailname), then $no_suffix should be some defined value.
1541# rename_method can be url, none, base64
[18319]1542sub rename_file {
[18337]1543 my ($filename, $rename_method, $no_suffix) = @_;
[18329]1544
[18337]1545 if(!$filename) { # undefined or empty string
[18329]1546 return $filename;
1547 }
[18319]1548
[20413]1549 if (!$rename_method) {
1550 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1551 # Debugging information
[22856]1552 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1553 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]1554 $rename_method = "url";
1555 } elsif($rename_method eq "none") {
1556 return $filename; # would have already been renamed
1557 }
1558
[19762]1559 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1560 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1561 ###$filename =~ s/ /_/g;
[18337]1562
1563 my ($tailname,$dirname,$suffix);
1564 if($no_suffix) { # given a tailname, no suffix
1565 ($tailname,$dirname) = File::Basename::fileparse($filename);
1566 }
1567 else {
1568 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1569 }
[23388]1570 if (!$suffix) {
1571 $suffix = "";
1572 }
1573 else {
1574 $suffix = lc($suffix);
1575 }
[18337]1576
[20413]1577 if ($rename_method eq "url") {
[18319]1578 $tailname = &unicode::url_encode($tailname);
1579 }
1580 elsif ($rename_method eq "base64") {
[18341]1581 $tailname = &unicode::base64_encode($tailname);
[18319]1582 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1583 }
[18326]1584
[18319]1585 $filename = "$tailname$suffix";
[18326]1586 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]1587
1588 return $filename;
1589}
1590
[21616]1591
1592# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]1593sub rename_ldb_or_bdb_file {
[18657]1594 my ($filename_no_ext) = @_;
1595
1596 my $new_filename = "$filename_no_ext.gdb";
[21615]1597 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]1598 # try ldb
1599 my $old_filename = "$filename_no_ext.ldb";
1600
1601 if (-f $old_filename) {
[19056]1602 print STDERR "Renaming $old_filename to $new_filename\n";
1603 rename ($old_filename, $new_filename)
1604 || print STDERR "Rename failed: $!\n";
[18657]1605 return;
1606 }
1607 # try bdb
1608 $old_filename = "$filename_no_ext.bdb";
1609 if (-f $old_filename) {
[19056]1610 print STDERR "Renaming $old_filename to $new_filename\n";
1611 rename ($old_filename, $new_filename)
1612 || print STDERR "Rename failed: $!\n";
[18657]1613 return;
1614 }
1615}
1616
[24874]1617sub os_dir() {
1618
1619 my $gsdlarch = "";
1620 if(defined $ENV{'GSDLARCH'}) {
1621 $gsdlarch = $ENV{'GSDLARCH'};
1622 }
1623 return $ENV{'GSDLOS'}.$gsdlarch;
1624}
[18657]1625
[21719]1626# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1627# By default, /greenstone3 for GS3 or /greenstone for GS2.
1628sub get_greenstone_url_prefix() {
1629 # if already set on a previous occasion, just return that
1630 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1631 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1632
[21719]1633 my ($configfile, $urlprefix, $defaultUrlprefix);
1634 my @propertynames = ();
1635
1636 if($ENV{'GSDL3SRCHOME'}) {
1637 $defaultUrlprefix = "/greenstone3";
1638 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1639 push(@propertynames, qw/path\s*\=/);
1640 } else {
1641 $defaultUrlprefix = "/greenstone";
[24874]1642 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
[21719]1643 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1644 }
1645
1646 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1647
1648 if(!$urlprefix) { # no values found for URL prefix, use default values
1649 $urlprefix = $defaultUrlprefix;
1650 } else {
1651 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1652 $urlprefix =~ s/^\///; # remove the starting slash
1653 my @dirs = split(/(\\|\/)/, $urlprefix);
1654 $urlprefix = shift(@dirs);
1655
1656 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1657 $urlprefix = "/$urlprefix";
1658 }
1659 }
1660
1661 # set for the future
1662 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1663# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1664 return $urlprefix;
1665}
1666
1667
1668# Given a config file (xml or java properties file) and a list/array of regular expressions
1669# that represent property names to match on, this function will return the value for the 1st
1670# matching property name. If the return value is undefined, no matching property was found.
1671sub extract_propvalue_from_file() {
1672 my ($configfile, $propertynames) = @_;
1673
1674 my $value;
1675 unless(open(FIN, "<$configfile")) {
1676 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1677 return $value; # not initialised
1678 }
1679
1680 # Read the entire file at once, as one single line, then close it
1681 my $filecontents;
1682 {
1683 local $/ = undef;
1684 $filecontents = <FIN>;
1685 }
1686 close(FIN);
1687
1688 foreach my $regex (@$propertynames) {
1689 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1690 if($value) {
1691 $value =~ s/^\"//; # remove any startquotes
1692 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1693 last; # found value for a matching property, break from loop
1694 }
1695 }
1696
1697 return $value;
1698}
1699
[23306]1700# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1701# given that perllib is in @INC in order to invoke this subroutine.
1702# Call as follows -- after setting up INC to include perllib and
1703# after setting up GSDLHOME and GSDLOS:
1704#
1705# require util;
1706# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1707#
1708sub setup_greenstone_env() {
1709 my ($GSDLHOME, $GSDLOS) = @_;
1710
1711 #my %env_map = ();
1712 # Get the localised ENV settings of running a localised source setup.bash
[23314]1713 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1714 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1715 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[23306]1716 if($GSDLOS =~ m/windows/i) {
[23314]1717 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1718 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1719 }
1720 if (!open(PIN, "$perl_command |")) {
1721 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1722 }
[23306]1723
1724 while (defined (my $perl_output_line = <PIN>)) {
1725 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1726 #$env_map{$key}=$value;
1727 $ENV{$key}=$value;
1728 }
[24563]1729 close (PIN);
1730
[23306]1731 # If any keys in $ENV don't occur in Greenstone's localised env
1732 # (stored in $env_map), delete those entries from $ENV
1733 #foreach $key (keys %ENV) {
1734 # if(!defined $env_map{$key}) {
1735 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1736 # delete $ENV{$key}; # del $ENV(key, value) pair
1737 # }
1738 #}
1739 #undef %env_map;
1740}
1741
[24362]1742sub get_perl_exec() {
1743 my $perl_exec = $^X; # may return just "perl"
1744
1745 if($ENV{'PERLPATH'}) {
1746 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1747 if($ENV{'GSDLOS'} =~ m/windows/) {
1748 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1749 } else {
1750 $perl_exec = "$ENV{'PERLPATH'}/perl";
1751 }
1752 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1753 # containing the full path to the current perl executable we're using
1754 $perl_exec = $Config{perlpath}; # configured path for perl
1755 if (!-e $perl_exec) { # may not point to location on this machine
1756 $perl_exec = $^X; # may return just "perl"
1757 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1758 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1759 }
1760 }
1761 }
1762
1763 return $perl_exec;
1764}
1765
[25533]1766# returns the path to the java command in the JRE included with GS (if any),
1767# quoted to safeguard any spaces in this path, otherwise a simple java
1768# command is returned which assumes and will try for a system java.
[25512]1769sub get_java_command {
1770 my $java = "java";
1771 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1772 # after running setup.bat or from GLI which also runs setup.bat
1773 my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin");
1774 if(-d $java_bin) {
1775 $java = &util::filename_cat($java_bin,"java");
[25533]1776 $java = "\"".$java."\""; # quoted to preserve spaces in path
[25512]1777 }
1778 }
1779 return $java;
1780}
[24362]1781
[25512]1782
[25577]1783# Given the qualified collection name (colgroup/collection),
1784# returns the collection and colgroup parts
1785sub get_collection_parts {
1786 # http://perldoc.perl.org/File/Basename.html
1787 # my($filename, $directories, $suffix) = fileparse($path);
1788 # "$directories contains everything up to and including the last directory separator in the $path
1789 # including the volume (if applicable). The remainder of the $path is the $filename."
1790 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1791
1792 my $qualified_collection = shift(@_);
1793
1794 # Since activate.pl can be launched from the command-line, including by a user,
1795 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1796 # Also allow for the accidental inclusion of multiple slashes
1797 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1798
1799 if(!defined $collection) {
1800 $collection = $colgroup;
1801 $colgroup = "";
1802 }
1803 return ($collection, $colgroup);
1804}
1805
1806# work out the "collectdir/collection" location
1807sub resolve_collection_dir {
1808 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1809
1810 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1811
1812 if (defined $collect_dir) {
1813 return &util::filename_cat($collect_dir,$colgroup, $collection);
1814 }
1815 else {
1816 if (defined $site) {
1817 return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
1818 }
1819 else {
1820 return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
1821 }
1822 }
1823}
1824
[4]18251;
Note: See TracBrowser for help on using the repository browser.