source: main/trunk/greenstone2/perllib/util.pm@ 26850

Last change on this file since 26850 was 26206, checked in by ak19, 12 years ago

Fixes to get Remote Greenstone 3 working with client-gli: 1. client-GLI should not start the local GS3 server, since client-GLI will be running against a remote server. 2. The encryption process for authentication had been changed for GS3, so now Authentication.java has a main function which is invoked by gliserver's gsdlCGI.pm to encrypt the password. 4. UsersDB when converted to txt for parsing by gliserver.pl has a different structure, so gliserver.pl needs to take that into account. 5. util.pm's functions for prepending and appending to environment variables needs to use an OS dependant path separator. This was not noticed when testing the remote GS server on 32 bit linux so far, but the windows style path separator (semicolon) used so far didn't work on the 64 bit linux test machine.

  • Property svn:keywords set to Author Date Id Revision
File size: 54.7 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
29
30use Encode;
[4]31use File::Copy;
[619]32use File::Basename;
[24362]33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
[4]38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
[18469]42
[4]43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
[10046]47 foreach my $file (@files) {
[4]48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
[721]50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
[4]52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
[23249]66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
[4]71
[23249]72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
[10211]84
[23249]85
[4]86# recursive removal
[10211]87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
[4]89
[24291]90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
[10211]94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
[4]96 # recursively remove the files
[10211]97 foreach my $file (@files_array) {
[4]98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
[10211]101 print STDERR "util::filtered_rm_r $file does not exist\n";
[4]102
[721]103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
[4]104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
[10211]106 print STDERR "util::filtered_rm_r could not open directory $file\n";
[4]107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
[10211]110
[4]111 # remove all the files in this directory
[10211]112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
[4]114
[10211]115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
[4]120 }
121 }
[10211]122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
[4]124
[10211]125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
[4]129 }
130 }
131}
132
[10211]133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
[721]147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
[4]151
[721]152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
[8716]167 foreach my $file (@srcfiles) {
[721]168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
[25598]176 if(!rename ($file, $tempdest)) {
177 print STDERR "**** Failed to rename $file to $tempdest\n";
178 &File::Copy::copy($file, $tempdest);
179 &rm($file);
180 }
181 elsif(-e $file) { # rename (partially) succeeded) but srcfile still exists after rename
182 #print STDERR "*** srcfile $file still exists after rename to $tempdest\n";
183 if(!-e $tempdest) {
184 print STDERR "@@@@ ERROR: $tempdest does not exist\n";
185 }
186 # Sometimes the rename operation fails (as does File::Copy::move).
187 # This turns out to be because the files are hardlinked.
188 # Need to do a copy-delete in this case, however, the copy step is not necessary:
189 # the srcfile got renamed into tempdest, but srcfile itself still exists, delete it.
190 #&File::Copy::copy($file, $tempdest);
191
192 &rm($file);
193 }
[721]194 }
195 }
196}
197
[25554]198# Move the contents of source directory into target directory
199# (as opposed to merely replacing target dir with the src dir)
200# This can overwrite any files with duplicate names in the target
201# but other files and folders in the target will continue to exist
202sub mv_dir_contents {
203 my ($src_dir, $dest_dir) = @_;
204
205 # Obtain listing of all files within src_dir
206 # Note that readdir lists relative paths, as well as . and ..
207 opendir(DIR, "$src_dir");
208 my @files= readdir(DIR);
[25579]209 close(DIR);
[25554]210
[25572]211 my @full_path_files = ();
[25554]212 foreach my $file (@files) {
213 # process all except . and ..
[25572]214 unless($file eq "." || $file eq "..") {
[25578]215
[25579]216 my $dest_subdir = &filename_cat($dest_dir, $file); # $file is still a relative path
[25578]217
218 # construct absolute paths
219 $file = &filename_cat($src_dir, $file); # $file is now an absolute path
220
221 # Recurse on directories which have an equivalent in target dest_dir
222 # If $file is a directory that already exists in target $dest_dir,
223 # then a simple move operation will fail (definitely on Windows).
224 if(-d $file && -d $dest_subdir) {
[25579]225 #print STDERR "**** $file is a directory also existing in target, its contents to be copied to $dest_subdir\n";
[25578]226 &mv_dir_contents($file, $dest_subdir);
[25572]227
228 # now all content is moved across, delete empty dir in source folder
229 if(&is_dir_empty($file)) {
[25577]230 if (!rmdir $file) {
[25579]231 print STDERR "ERROR. util::mv_dir_contents couldn't remove directory $file\n";
[25577]232 }
[25572]233 } else { # error
[25598]234 print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_subdir\n";
[25572]235 }
[25578]236 } else { # process files and any directories that don't already exist with a simple move
[25572]237 push(@full_path_files, $file);
238 }
239 }
[25554]240 }
[25572]241
242 if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist
243 &mk_dir($dest_dir);
244 }
[25598]245
246 #print STDERR "@@@@@ Copying files |".join(",", @full_path_files)."| to: $dest_dir\n";
247
[25572]248 if(@full_path_files) { # if non-empty, there's something to copy across
249 &mv(@full_path_files, $dest_dir);
250 }
[25554]251}
[721]252
[25554]253
[4]254# copies a file or a group of files
255sub cp {
256 my $dest = pop (@_);
257 my (@srcfiles) = @_;
258
259 # remove trailing slashes from source and destination files
260 $dest =~ s/[\\\/]+$//;
261 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
262
263 # a few sanity checks
264 if (scalar (@srcfiles) == 0) {
265 print STDERR "util::cp no destination directory given\n";
266 return;
267 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
268 print STDERR "util::cp if multiple source files are given the ".
269 "destination must be a directory\n";
270 return;
271 }
272
273 # copy the files
[8716]274 foreach my $file (@srcfiles) {
[4]275 my $tempdest = $dest;
276 if (-d $tempdest) {
277 my ($filename) = $file =~ /([^\\\/]+)$/;
278 $tempdest .= "/$filename";
279 }
280 if (!-e $file) {
281 print STDERR "util::cp $file does not exist\n";
282 } elsif (!-f $file) {
283 print STDERR "util::cp $file is not a plain file\n";
284 } else {
285 &File::Copy::copy ($file, $tempdest);
286 }
287 }
288}
289
290
[721]291
[4]292# recursively copies a file or group of files
[1454]293# syntax: cp_r (sourcefiles, destination directory)
294# destination must be a directory - to copy one file to
295# another use cp instead
[4]296sub cp_r {
297 my $dest = pop (@_);
298 my (@srcfiles) = @_;
299
300 # a few sanity checks
301 if (scalar (@srcfiles) == 0) {
[1454]302 print STDERR "util::cp_r no destination directory given\n";
[4]303 return;
[1454]304 } elsif (-f $dest) {
305 print STDERR "util::cp_r destination must be a directory\n";
[4]306 return;
307 }
308
[1454]309 # create destination directory if it doesn't exist already
310 if (! -d $dest) {
311 my $store_umask = umask(0002);
312 mkdir ($dest, 0777);
313 umask($store_umask);
314 }
315
[4]316 # copy the files
[8716]317 foreach my $file (@srcfiles) {
[4]318
319 if (!-e $file) {
[1454]320 print STDERR "util::cp_r $file does not exist\n";
[4]321
322 } elsif (-d $file) {
[1586]323 # make the new directory
324 my ($filename) = $file =~ /([^\\\/]*)$/;
325 $dest = &util::filename_cat ($dest, $filename);
326 my $store_umask = umask(0002);
327 mkdir ($dest, 0777);
328 umask($store_umask);
[836]329
[4]330 # get the contents of this directory
331 if (!opendir (INDIR, $file)) {
332 print STDERR "util::cp_r could not open directory $file\n";
333 } else {
[1454]334 my @filedir = readdir (INDIR);
[4]335 closedir (INDIR);
[8716]336 foreach my $f (@filedir) {
[1454]337 next if $f =~ /^\.\.?$/;
338 # copy all the files in this directory
339 my $ff = &util::filename_cat ($file, $f);
340 &cp_r ($ff, $dest);
341 }
[4]342 }
343
344 } else {
[1454]345 &cp($file, $dest);
[4]346 }
347 }
348}
[21762]349# recursively copies a file or group of files
350# syntax: cp_r (sourcefiles, destination directory)
351# destination must be a directory - to copy one file to
352# another use cp instead
353sub cp_r_nosvn {
354 my $dest = pop (@_);
355 my (@srcfiles) = @_;
[4]356
[21762]357 # a few sanity checks
358 if (scalar (@srcfiles) == 0) {
359 print STDERR "util::cp_r no destination directory given\n";
360 return;
361 } elsif (-f $dest) {
362 print STDERR "util::cp_r destination must be a directory\n";
363 return;
364 }
365
366 # create destination directory if it doesn't exist already
367 if (! -d $dest) {
368 my $store_umask = umask(0002);
369 mkdir ($dest, 0777);
370 umask($store_umask);
371 }
372
373 # copy the files
374 foreach my $file (@srcfiles) {
375
376 if (!-e $file) {
377 print STDERR "util::cp_r $file does not exist\n";
378
379 } elsif (-d $file) {
380 # make the new directory
381 my ($filename) = $file =~ /([^\\\/]*)$/;
382 $dest = &util::filename_cat ($dest, $filename);
383 my $store_umask = umask(0002);
384 mkdir ($dest, 0777);
385 umask($store_umask);
386
387 # get the contents of this directory
388 if (!opendir (INDIR, $file)) {
389 print STDERR "util::cp_r could not open directory $file\n";
390 } else {
391 my @filedir = readdir (INDIR);
392 closedir (INDIR);
393 foreach my $f (@filedir) {
394 next if $f =~ /^\.\.?$/;
395 next if $f =~ /^\.svn$/;
396 # copy all the files in this directory
397 my $ff = &util::filename_cat ($file, $f);
398 &cp_r ($ff, $dest);
399 }
400 }
401
402 } else {
403 &cp($file, $dest);
404 }
405 }
406}
407
[11179]408# copies a directory and its contents, excluding subdirectories, into a new directory
409sub cp_r_toplevel {
410 my $dest = pop (@_);
411 my (@srcfiles) = @_;
[4]412
[11179]413 # a few sanity checks
414 if (scalar (@srcfiles) == 0) {
415 print STDERR "util::cp_r no destination directory given\n";
416 return;
417 } elsif (-f $dest) {
418 print STDERR "util::cp_r destination must be a directory\n";
419 return;
420 }
421
422 # create destination directory if it doesn't exist already
423 if (! -d $dest) {
424 my $store_umask = umask(0002);
425 mkdir ($dest, 0777);
426 umask($store_umask);
427 }
428
429 # copy the files
430 foreach my $file (@srcfiles) {
431
432 if (!-e $file) {
433 print STDERR "util::cp_r $file does not exist\n";
434
435 } elsif (-d $file) {
436 # make the new directory
437 my ($filename) = $file =~ /([^\\\/]*)$/;
438 $dest = &util::filename_cat ($dest, $filename);
439 my $store_umask = umask(0002);
440 mkdir ($dest, 0777);
441 umask($store_umask);
442
443 # get the contents of this directory
444 if (!opendir (INDIR, $file)) {
445 print STDERR "util::cp_r could not open directory $file\n";
446 } else {
447 my @filedir = readdir (INDIR);
448 closedir (INDIR);
449 foreach my $f (@filedir) {
450 next if $f =~ /^\.\.?$/;
451
452 # copy all the files in this directory, but not directories
453 my $ff = &util::filename_cat ($file, $f);
454 if (-f $ff) {
455 &cp($ff, $dest);
456 #&cp_r ($ff, $dest);
457 }
458 }
459 }
460
461 } else {
462 &cp($file, $dest);
463 }
464 }
465}
466
[721]467sub mk_dir {
468 my ($dir) = @_;
469
[836]470 my $store_umask = umask(0002);
471 my $mkdir_ok = mkdir ($dir, 0777);
472 umask($store_umask);
473
474 if (!$mkdir_ok)
475 {
[721]476 print STDERR "util::mk_dir could not create directory $dir\n";
477 return;
478 }
479}
480
[1046]481# in case anyone cares - I did some testing (using perls Benchmark module)
482# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
483# slightly faster (surprisingly) - Stefan.
[4]484sub mk_all_dir {
485 my ($dir) = @_;
486
487 # use / for the directory separator, remove duplicate and
488 # trailing slashes
489 $dir=~s/[\\\/]+/\//g;
490 $dir=~s/[\\\/]+$//;
491
492 # make sure the cache directory exists
493 my $dirsofar = "";
494 my $first = 1;
[8716]495 foreach my $dirname (split ("/", $dir)) {
[4]496 $dirsofar .= "/" unless $first;
497 $first = 0;
498
499 $dirsofar .= $dirname;
500
501 next if $dirname =~ /^(|[a-z]:)$/i;
[836]502 if (!-e $dirsofar)
503 {
504 my $store_umask = umask(0002);
505 my $mkdir_ok = mkdir ($dirsofar, 0777);
506 umask($store_umask);
507 if (!$mkdir_ok)
508 {
509 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
510 return;
511 }
512 }
[4]513 }
514}
515
[619]516# make hard link to file if supported by OS, otherwise copy the file
517sub hard_link {
[18463]518 my ($src, $dest, $verbosity) = @_;
[4]519
[619]520 # remove trailing slashes from source and destination files
521 $src =~ s/[\\\/]+$//;
522 $dest =~ s/[\\\/]+$//;
523
[23307]524## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
[619]525 # a few sanity checks
[812]526 if (-e $dest) {
527 # destination file already exists
528 return;
529 }
530 elsif (!-e $src) {
[23307]531 print STDERR "util::hard_link source file \"$src\" does not exist\n";
[3628]532 return 1;
[619]533 }
534 elsif (-d $src) {
[23307]535 print STDERR "util::hard_link source \"$src\" is a directory\n";
[3628]536 return 1;
[619]537 }
538
539 my $dest_dir = &File::Basename::dirname($dest);
540 mk_all_dir($dest_dir) if (!-e $dest_dir);
541
[14365]542
[22119]543 if (!link($src, $dest)) {
[18463]544 if ((!defined $verbosity) || ($verbosity>2)) {
545 print STDERR "util::hard_link: unable to create hard link. ";
546 print STDERR " Copying file: $src -> $dest\n";
547 }
[14365]548 &File::Copy::copy ($src, $dest);
[619]549 }
[3628]550 return 0;
[619]551}
552
[2193]553# make soft link to file if supported by OS, otherwise copy file
[721]554sub soft_link {
[15165]555 my ($src, $dest, $ensure_paths_absolute) = @_;
[619]556
[721]557 # remove trailing slashes from source and destination files
558 $src =~ s/[\\\/]+$//;
559 $dest =~ s/[\\\/]+$//;
[619]560
[15165]561 # Ensure file paths are absolute IF requested to do so
562 # Soft_linking didn't work for relative paths
563 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
564 # We need to ensure that the src file is the absolute path
565 # See http://perldoc.perl.org/File/Spec.html
566 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
567 $src = File::Spec->rel2abs($src); # make absolute
568 }
569 # Might as well ensure that the destination file's absolute path is used
570 if(!File::Spec->file_name_is_absolute( $dest )) {
571 $dest = File::Spec->rel2abs($dest); # make absolute
572 }
573 }
574
[721]575 # a few sanity checks
576 if (!-e $src) {
577 print STDERR "util::soft_link source file $src does not exist\n";
578 return 0;
579 }
[619]580
[721]581 my $dest_dir = &File::Basename::dirname($dest);
582 mk_all_dir($dest_dir) if (!-e $dest_dir);
[14365]583
[2193]584 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[23484]585
[14365]586 # symlink not supported on windows
587 &File::Copy::copy ($src, $dest);
[2193]588
589 } elsif (!eval {symlink($src, $dest)}) {
[2974]590 print STDERR "util::soft_link: unable to create soft link.\n";
[721]591 return 0;
592 }
593
594 return 1;
595}
596
[23362]597# Primarily for filenames generated by processing
598# content of HTML files (which are mapped to UTF-8 internally)
599#
600# To turn this into an octet string that really exists on the file
601# system:
602# 1. don't need to do anything special for Unix-based systems
603# (as underlying file system is byte-code)
604# 2. need to map to short DOS filenames for Windows
[721]605
[23362]606sub utf8_to_real_filename
607{
608 my ($utf8_filename) = @_;
[721]609
[23362]610 my $real_filename;
[721]611
[23362]612 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
613 require Win32;
[23388]614
615 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
616
[23362]617 my $unicode_filename = decode("utf8",$utf8_filename);
618 $real_filename = Win32::GetShortPathName($unicode_filename);
619 }
620 else {
621 $real_filename = $utf8_filename;
622 }
623
624 return $real_filename;
625}
626
627
628sub fd_exists
629{
630 my $filename_full_path = shift @_;
631 my $test_op = shift @_ || "-e";
632
633 # By default tests for existance of file or directory (-e)
634 # Can be made more specific by providing second parameter (e.g. -f or -d)
635
636 my $exists = 0;
637
638 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
639 require Win32;
640 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
641 if (!defined $filename_short_path) {
642 # Was probably still in UTF8 form (not what is needed on Windows)
643 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
644 if (defined $unicode_filename_full_path) {
645 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
646 }
647 }
648 $filename_full_path = $filename_short_path;
649 }
650
651 if (defined $filename_full_path) {
652 $exists = eval "($test_op \$filename_full_path)";
653 }
654
655 return $exists;
656}
657
658sub file_exists
659{
660 my ($filename_full_path) = @_;
661
662 return fd_exists($filename_full_path,"-f");
663}
664
665sub dir_exists
666{
667 my ($filename_full_path) = @_;
668
669 return fd_exists($filename_full_path,"-d");
670}
671
672
673
[4]674# updates a copy of a directory in some other part of the filesystem
675# verbosity settings are: 0=low, 1=normal, 2=high
676# both $fromdir and $todir should be absolute paths
677sub cachedir {
678 my ($fromdir, $todir, $verbosity) = @_;
679 $verbosity = 1 unless defined $verbosity;
680
681 # use / for the directory separator, remove duplicate and
682 # trailing slashes
683 $fromdir=~s/[\\\/]+/\//g;
684 $fromdir=~s/[\\\/]+$//;
685 $todir=~s/[\\\/]+/\//g;
686 $todir=~s/[\\\/]+$//;
687
688 &mk_all_dir ($todir);
689
690 # get the directories in ascending order
691 if (!opendir (FROMDIR, $fromdir)) {
692 print STDERR "util::cachedir could not read directory $fromdir\n";
693 return;
694 }
695 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
696 closedir (FROMDIR);
697
698 if (!opendir (TODIR, $todir)) {
699 print STDERR "util::cacedir could not read directory $todir\n";
700 return;
701 }
702 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
703 closedir (TODIR);
704
705 my $fromi = 0;
706 my $toi = 0;
707
708 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
709# print "fromi: $fromi toi: $toi\n";
710
711 # see if we should delete a file/directory
712 # this should happen if the file/directory
713 # is not in the from list or if its a different
714 # size, or has an older timestamp
715 if ($toi < scalar(@todir)) {
716 if (($fromi >= scalar(@fromdir)) ||
717 ($todir[$toi] lt $fromdir[$fromi] ||
718 ($todir[$toi] eq $fromdir[$fromi] &&
719 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
720 $verbosity)))) {
721
722 # the files are different
723 &rm_r("$todir/$todir[$toi]");
724 splice(@todir, $toi, 1); # $toi stays the same
725
726 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
727 # the files are the same
728 # if it is a directory, check its contents
729 if (-d "$todir/$todir[$toi]") {
730 &cachedir ("$fromdir/$fromdir[$fromi]",
731 "$todir/$todir[$toi]", $verbosity);
732 }
733
734 $toi++;
735 $fromi++;
736 next;
737 }
738 }
739
740 # see if we should insert a file/directory
741 # we should insert a file/directory if there
742 # is no tofiles left or if the tofile does not exist
743 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
744 $todir[$toi] gt $fromdir[$fromi])) {
745 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
746 splice (@todir, $toi, 0, $fromdir[$fromi]);
747
748 $toi++;
749 $fromi++;
750 }
751 }
752}
753
754# this function returns -1 if either file is not found
755# assumes that $file1 and $file2 are absolute file names or
756# in the current directory
757# $file2 is allowed to be newer than $file1
758sub differentfiles {
759 my ($file1, $file2, $verbosity) = @_;
760 $verbosity = 1 unless defined $verbosity;
761
762 $file1 =~ s/\/+$//;
763 $file2 =~ s/\/+$//;
764
765 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
766 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
767
768 return -1 unless (-e $file1 && -e $file2);
769 if ($file1name ne $file2name) {
770 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
771 return 1;
772 }
773
[8716]774 my @file1stat = stat ($file1);
775 my @file2stat = stat ($file2);
[4]776
777 if (-d $file1) {
778 if (! -d $file2) {
779 print STDERR "one file is a directory\n" if ($verbosity >= 2);
780 return 1;
781 }
782 return 0;
783 }
784
785 # both must be regular files
786 unless (-f $file1 && -f $file2) {
787 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
788 return 1;
789 }
790
791 # the size of the files must be the same
792 if ($file1stat[7] != $file2stat[7]) {
793 print STDERR "different sized files\n" if ($verbosity >= 2);
794 return 1;
795 }
796
797 # the second file cannot be older than the first
798 if ($file1stat[9] > $file2stat[9]) {
799 print STDERR "file is older\n" if ($verbosity >= 2);
800 return 1;
801 }
802
803 return 0;
804}
805
806
[16266]807sub get_tmp_filename
808{
809 my $file_ext = shift(@_) || undef;
810
[22438]811 my $opt_dot_file_ext = "";
812 if (defined $file_ext) {
813 if ($file_ext !~ m/\./) {
814 # no dot, so needs one added in at start
815 $opt_dot_file_ext = ".$file_ext"
816 }
817 else {
818 # allow for "extensions" such as _metadata.txt to be handled
819 # gracefully
820 $opt_dot_file_ext = $file_ext;
821 }
822 }
[16266]823
[2795]824 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
[4]825 &mk_all_dir ($tmpdir) unless -e $tmpdir;
826
827 my $count = 1000;
828 my $rand = int(rand $count);
[16266]829 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
830
831 while (-e $full_tmp_filename) {
[4]832 $rand = int(rand $count);
[16266]833 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
[4]834 $count++;
835 }
[16266]836
837 return $full_tmp_filename;
[4]838}
839
[22886]840sub get_timestamped_tmp_folder
[22873]841{
842
[22886]843 my $tmp_dirname;
[22873]844 if(defined $ENV{'GSDLCOLLECTDIR'}) {
845 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
846 } elsif(defined $ENV{'GSDLHOME'}) {
847 $tmp_dirname = $ENV{'GSDLHOME'};
[22886]848 } else {
849 return undef;
[22873]850 }
851
852 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
853 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
854
855 # add the timestamp into the path otherwise we can run into problems
856 # if documents have the same name
857 my $timestamp = time;
858 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
859 $tmp_dirname = $time_tmp_dirname;
860 my $i = 1;
861 while (-e $tmp_dirname) {
862 $tmp_dirname = "$time_tmp_dirname$i";
863 $i++;
864 }
865 &util::mk_dir($tmp_dirname);
866
[22886]867 return $tmp_dirname;
868}
[22873]869
[22886]870sub get_timestamped_tmp_filename_in_collection
871{
872
873 my ($input_filename, $output_ext) = @_;
874 # derive tmp filename from input filename
875 my ($tailname, $dirname, $suffix)
876 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
877
878 # softlink to collection tmp dir
879 my $tmp_dirname = &util::get_timestamped_tmp_folder();
880 $tmp_dirname = $dirname unless defined $tmp_dirname;
881
[22873]882 # following two steps copied from ConvertBinaryFile
[22886]883 # do we need them?? can't use them as is, as they use plugin methods.
884
[22873]885 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
886
887 # URLEncode this since htmls with images where the html filename is utf8 don't seem
888 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
889 # files on the filesystem.
890 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
891 if (defined $output_ext) {
892 $output_ext = ".$output_ext"; # add the dot
893 } else {
894 $output_ext = $suffix;
895 }
896 $output_ext= lc($output_ext);
897 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
898
899 return $tmp_filename;
900}
901
[21218]902sub get_toplevel_tmp_dir
903{
904 return filename_cat($ENV{'GSDLHOME'}, "tmp");
905}
906
907
[17512]908sub filename_to_regex {
909 my $filename = shift (@_);
[4]910
[24971]911 # need to make single backslashes double so that regex works
[24832]912 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
[24829]913
[24832]914 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
915 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
[24829]916 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
917 $filename =~ s@\(@\\(@g; # escape brackets
918 $filename =~ s@\)@\\)@g; # escape brackets
[24932]919 $filename =~ s@\[@\\[@g; # escape brackets
920 $filename =~ s@\]@\\]@g; # escape brackets
[24829]921
[17512]922 return $filename;
923}
924
[24829]925sub unregex_filename {
926 my $filename = shift (@_);
927
928 # need to put doubled backslashes for regex back to single
929 $filename =~ s/\\\./\./g; # remove RE syntax for .
930 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
931 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
[24932]932 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
933 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
[24940]934
935 # \\ goes to \
936 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
937 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
[24829]938 return $filename;
939}
940
[4]941sub filename_cat {
[7507]942 my $first_file = shift(@_);
[4]943 my (@filenames) = @_;
[10146]944
[16266]945# Useful for debugging
946# -- might make sense to call caller(0) rather than (1)??
947# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
[22856]948# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
[18913]949
950 # If first_file is not null or empty, then add it back into the list
951 if (defined $first_file && $first_file =~ /\S/) {
[7507]952 unshift(@filenames, $first_file);
953 }
954
[4]955 my $filename = join("/", @filenames);
956
957 # remove duplicate slashes and remove the last slash
[488]958 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
959 $filename =~ s/[\\\/]+/\\/g;
960 } else {
[836]961 $filename =~ s/[\/]+/\//g;
962 # DB: want a filename abc\de.html to remain like this
[488]963 }
964 $filename =~ s/[\\\/]$//;
[4]965
966 return $filename;
967}
968
[21413]969
970sub pathname_cat {
971 my $first_path = shift(@_);
972 my (@pathnames) = @_;
973
974 # If first_path is not null or empty, then add it back into the list
975 if (defined $first_path && $first_path =~ /\S/) {
976 unshift(@pathnames, $first_path);
977 }
978
[21425]979 my $join_char;
[21413]980 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
981 $join_char = ";";
982 } else {
983 $join_char = ":";
984 }
985
986 my $pathname = join($join_char, @pathnames);
987
988 # remove duplicate slashes
989 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
990 $pathname =~ s/[\\\/]+/\\/g;
991 } else {
992 $pathname =~ s/[\/]+/\//g;
993 # DB: want a pathname abc\de.html to remain like this
994 }
995
996 return $pathname;
997}
998
999
[19616]1000sub tidy_up_oid {
1001 my ($OID) = @_;
1002 if ($OID =~ /\./) {
1003 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
1004 $OID =~ s/\.//g; #remove any periods
1005 }
1006 if ($OID =~ /^\s.*\s$/) {
1007 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
1008 # remove starting and trailing whitespace
1009 $OID =~ s/^\s+//;
1010 $OID =~ s/\s+$//;
1011 }
1012 if ($OID =~ /^[\d]*$/) {
1013 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
1014 $OID = "D" . $OID;
1015 }
1016
1017 return $OID;
1018}
[26206]1019
[10212]1020sub envvar_prepend {
1021 my ($var,$val) = @_;
1022
[26206]1023 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
1024 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
1025
[16404]1026 # do not prepend any value/path that's already in the environment variable
[24832]1027
1028 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1029 if (!defined($ENV{$var})) {
1030 $ENV{$var} = "$val";
[16442]1031 }
[24832]1032 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]1033 $ENV{$var} = "$val".$pathsep.$ENV{$var};
[10212]1034 }
1035}
1036
1037sub envvar_append {
1038 my ($var,$val) = @_;
[26206]1039
1040 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
1041 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[24832]1042
[16404]1043 # do not append any value/path that's already in the environment variable
[26206]1044
[24832]1045 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1046 if (!defined($ENV{$var})) {
1047 $ENV{$var} = "$val";
[16442]1048 }
[24832]1049 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]1050 $ENV{$var} = $ENV{$var}.$pathsep."$val";
[24832]1051 }
[10212]1052}
1053
[16442]1054
[16380]1055# splits a filename into a prefix and a tail extension using the tail_re, or
1056# if that fails, splits on the file_extension . (dot)
1057sub get_prefix_and_tail_by_regex {
[10212]1058
[16380]1059 my ($filename,$tail_re) = @_;
1060
1061 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
1062 if ((!defined $file_prefix) || (!defined $file_ext)) {
1063 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1064 }
1065
1066 return ($file_prefix,$file_ext);
1067}
1068
1069# get full path and file only path from a base_dir (which may be empty) and
1070# file (which may contain directories)
1071sub get_full_filenames {
1072 my ($base_dir, $file) = @_;
1073
1074 my $filename_full_path = $file;
1075 # add on directory if present
1076 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1077
1078 my $filename_no_path = $file;
1079
1080 # remove directory if present
1081 $filename_no_path =~ s/^.*[\/\\]//;
1082 return ($filename_full_path, $filename_no_path);
1083}
1084
[8682]1085# returns the path of a file without the filename -- ie. the directory the file is in
1086sub filename_head {
1087 my $filename = shift(@_);
1088
1089 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1090 $filename =~ s/[^\\\\]*$//;
1091 }
1092 else {
1093 $filename =~ s/[^\\\/]*$//;
1094 }
1095
1096 return $filename;
1097}
1098
1099
[23362]1100
[1454]1101# returns 1 if filename1 and filename2 point to the same
1102# file or directory
1103sub filenames_equal {
1104 my ($filename1, $filename2) = @_;
1105
1106 # use filename_cat to clean up trailing slashes and
1107 # multiple slashes
1108 $filename1 = filename_cat ($filename1);
[2516]1109 $filename2 = filename_cat ($filename2);
[1454]1110
1111 # filenames not case sensitive on windows
1112 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1113 $filename1 =~ tr/[A-Z]/[a-z]/;
1114 $filename2 =~ tr/[A-Z]/[a-z]/;
1115 }
1116 return 1 if $filename1 eq $filename2;
1117 return 0;
1118}
1119
[24932]1120# If filename is relative to within_dir, returns the relative path of filename to that directory
1121# with slashes in the filename returned as they were in the original (absolute) filename.
[23362]1122sub filename_within_directory
1123{
1124 my ($filename,$within_dir) = @_;
1125
[23371]1126 if ($within_dir !~ m/[\/\\]$/) {
1127 my $dirsep = &util::get_dirsep();
[23362]1128 $within_dir .= $dirsep;
1129 }
1130
[24829]1131 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]1132 if ($filename =~ m/^$within_dir(.*)$/) {
1133 $filename = $1;
1134 }
1135
1136 return $filename;
1137}
1138
[24932]1139# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1140# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1141# The subpath returned will also be a URL type filename.
1142sub filename_within_directory_url_format
1143{
1144 my ($filename,$within_dir) = @_;
1145
1146 # convert parameters only to / slashes if Windows
1147
[24971]1148 my $filename_urlformat = &filepath_to_url_format($filename);
1149 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
1150
[24932]1151 #if ($within_dir_urlformat !~ m/\/$/) {
1152 # make sure directory ends with a slash
1153 #$within_dir_urlformat .= "/";
1154 #}
1155
1156 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1157
1158 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1159
1160 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1161 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1162 $filename_urlformat = $1;
1163 }
1164
1165 return $filename_urlformat;
1166}
1167
[24971]1168# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
1169# since on Linux it doesn't represent a file separator but an escape char).
1170sub filepath_to_url_format
1171{
1172 my ($filepath) = @_;
1173 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1174 # Only need to worry about Windows, as Unix style directories already in url-format
1175 # Convert Windows style \ => /
1176 $filepath =~ s@\\@/@g;
1177 }
1178 return $filepath;
1179}
[24932]1180
[25093]1181# regex filepaths on windows may include \\ as path separator. Convert \\ to /
1182sub filepath_regex_to_url_format
1183{
1184 my ($filepath) = @_;
1185 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1186 # Only need to worry about Windows, as Unix style directories already in url-format
1187 # Convert Windows style \\ => /
1188 $filepath =~ s@\\\\@/@g;
1189 }
1190 return $filepath;
1191
1192}
[24971]1193
[25093]1194# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
1195# and ignores trailing /
1196# returns (file, dirs) dirs will be empty if no subdirs
1197sub url_fileparse
1198{
1199 my ($filepath) = @_;
1200 # remove trailing /
1201 $filepath =~ s@/$@@;
1202 if ($filepath !~ m@/@) {
1203 return ($filepath, "");
1204 }
1205 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
1206 return ($file, $dirs);
1207
1208}
1209
1210
[10281]1211sub filename_within_collection
1212{
1213 my ($filename) = @_;
1214
1215 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1216
1217 if (defined $collect_dir) {
[23362]1218
[15875]1219 # if from within GSDLCOLLECTDIR, then remove directory prefix
1220 # so source_filename is realative to it. This is done to aid
1221 # portability, i.e. the collection can be moved to somewhere
1222 # else on the file system and the archives directory will still
1223 # work. This is needed, for example in the applet version of
1224 # GLI where GSDLHOME/collect on the server will be different to
1225 # the collect directory of the remove user. Of course,
1226 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1227 # it back into a full pathname.
[23362]1228
1229 $filename = filename_within_directory($filename,$collect_dir);
[10281]1230 }
1231
1232 return $filename;
1233}
1234
[23362]1235sub prettyprint_file
1236{
[23484]1237 my ($base_dir,$file,$gli) = @_;
[23362]1238
1239 my $filename_full_path = &util::filename_cat($base_dir,$file);
1240
1241 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1242 require Win32;
1243
1244 # For some reason base_dir in the form c:/a/b/c
1245 # This leads to confusion later on, so turn it back into
1246 # the more usual Windows form
1247 $base_dir =~ s/\//\\/g;
1248 my $long_base_dir = Win32::GetLongPathName($base_dir);
1249 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1250
1251 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]1252 $file = encode("utf8",$file) if ($gli);
[23362]1253 }
1254
1255 return $file;
1256}
1257
1258
1259sub upgrade_if_dos_filename
1260{
[23371]1261 my ($filename_full_path,$and_encode) = @_;
[23362]1262
1263 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1264 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1265 # to its long (Windows) version
[23416]1266 my $long_filename = Win32::GetLongPathName($filename_full_path);
1267 if (defined $long_filename) {
1268 $filename_full_path = $long_filename;
1269 }
[23362]1270 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]1271 $filename_full_path =~ s/^(.):/\u$1:/;
[23371]1272 if ((defined $and_encode) && ($and_encode)) {
1273 $filename_full_path = encode("utf8",$filename_full_path);
1274 }
[23362]1275 }
1276
1277 return $filename_full_path;
1278}
1279
1280
[23388]1281sub downgrade_if_dos_filename
1282{
1283 my ($filename_full_path) = @_;
1284
1285 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1286 require Win32;
1287
1288 # Ensure the given long Windows filename is in a form that can
1289 # be opened by Perl => convert it to a short DOS-like filename
1290
[23414]1291 my $short_filename = Win32::GetShortPathName($filename_full_path);
1292 if (defined $short_filename) {
1293 $filename_full_path = $short_filename;
1294 }
[23416]1295 # Make sure initial drive letter is lower-case (to fit in
1296 # with rest of Greenstone)
[23483]1297 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]1298 }
1299
1300 return $filename_full_path;
1301}
1302
[23561]1303sub block_filename
1304{
1305 my ($block_hash,$filename) = @_;
1306
1307 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1308
1309 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1310 my $lower_filename = lc($filename);
1311 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1312# my $lower_drive = $filename;
1313# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1314
1315# my $upper_drive = $filename;
1316# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1317#
1318# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1319# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1320 }
1321 else {
1322 $block_hash->{'file_blocks'}->{$filename} = 1;
1323 }
1324}
[23388]1325
[23561]1326
[18441]1327sub filename_is_absolute
1328{
1329 my ($filename) = @_;
1330
1331 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1332 return ($filename =~ m/^(\w:)?\\/);
1333 }
1334 else {
1335 return ($filename =~ m/^\//);
1336 }
1337}
1338
1339
[17572]1340## @method make_absolute()
1341#
1342# Ensure the given file path is absolute in respect to the given base path.
1343#
1344# @param $base_dir A string denoting the base path the given dir must be
1345# absolute to.
1346# @param $dir The directory to be made absolute as a string. Note that the
1347# dir may already be absolute, in which case it will remain
1348# unchanged.
1349# @return The now absolute form of the directory as a string.
1350#
1351# @author John Thompson, DL Consulting Ltd.
1352# @copy 2006 DL Consulting Ltd.
1353#
1354#used in buildcol.pl, doesn't work for all cases --kjdon
1355sub make_absolute {
1356
1357 my ($base_dir, $dir) = @_;
[18441]1358### print STDERR "dir = $dir\n";
[17572]1359 $dir =~ s/[\\\/]+/\//g;
1360 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1361 $dir =~ s|^/tmp_mnt||;
1362 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1363 $dir =~ s|/[.][.]?/|/|g;
1364 $dir =~ tr|/|/|s;
[18441]1365### print STDERR "dir = $dir\n";
[17572]1366
1367 return $dir;
1368}
1369## make_absolute() ##
[10281]1370
[7929]1371sub get_dirsep {
1372
1373 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1374 return "\\";
1375 } else {
1376 return "\/";
1377 }
1378}
1379
[619]1380sub get_os_dirsep {
[4]1381
[619]1382 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1383 return "\\\\";
1384 } else {
1385 return "\\\/";
1386 }
1387}
1388
1389sub get_re_dirsep {
1390
1391 return "\\\\|\\\/";
1392}
1393
1394
[15003]1395sub get_dirsep_tail {
1396 my ($filename) = @_;
1397
1398 # returns last part of directory or filename
1399 # On unix e.g. a/b.d => b.d
1400 # a/b/c => c
1401
[15088]1402 my $dirsep = get_re_dirsep();
1403 my @dirs = split (/$dirsep/, $filename);
1404 my $tail = pop @dirs;
[15003]1405
[15088]1406 # - caused problems under windows
1407 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1408
[15003]1409 return $tail;
1410}
1411
1412
[4]1413# if this is running on windows we want binaries to end in
1414# .exe, otherwise they don't have to end in any extension
1415sub get_os_exe {
1416 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1417 return "";
1418}
1419
1420
[86]1421# test to see whether this is a big or little endian machine
[15713]1422sub is_little_endian
1423{
1424 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1425 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1426 # Otherwise, it's little endian
1427
1428 #return 0 if $^O =~ /^darwin$/i;
[17714]1429 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1430
1431 # Going back to stating exactly whether the machine is little endian
1432 # or big endian, without any special case for Macs. Since for rata it comes
1433 # back with little endian and for shuttle with bigendian.
[15713]1434 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]1435}
[4]1436
[86]1437
[135]1438# will return the collection name if successful, "" otherwise
1439sub use_collection {
[1454]1440 my ($collection, $collectdir) = @_;
[135]1441
[1454]1442 if (!defined $collectdir || $collectdir eq "") {
1443 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1444 }
1445
[135]1446 # get and check the collection
1447 if (!defined($collection) || $collection eq "") {
1448 if (defined $ENV{'GSDLCOLLECTION'}) {
1449 $collection = $ENV{'GSDLCOLLECTION'};
1450 } else {
[2359]1451 print STDOUT "No collection specified\n";
[135]1452 return "";
1453 }
1454 }
1455
1456 if ($collection eq "modelcol") {
[2359]1457 print STDOUT "You can't use modelcol.\n";
[135]1458 return "";
1459 }
1460
1461 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1462 # are defined
[17204]1463 $ENV{'GSDLCOLLECTION'} = $collection;
[1454]1464 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
[135]1465
1466 # make sure this collection exists
1467 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]1468 print STDOUT "Invalid collection ($collection).\n";
[135]1469 return "";
1470 }
1471
1472 # everything is ready to go
1473 return $collection;
1474}
1475
[21207]1476sub get_current_collection_name {
1477 return $ENV{'GSDLCOLLECTION'};
1478}
[14926]1479
1480
1481# will return the collection name if successful, "" otherwise.
1482# Like use_collection (above) but for greenstone 3 (taking account of site level)
1483
1484sub use_site_collection {
1485 my ($site, $collection, $collectdir) = @_;
1486
1487 if (!defined $collectdir || $collectdir eq "") {
1488 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1489 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1490 }
1491
1492 # collectdir explicitly set by this point (using $site variable if required).
1493 # Can call "old" gsdl2 use_collection now.
1494
1495 return use_collection($collection,$collectdir);
1496}
1497
1498
1499
[15018]1500sub locate_config_file
1501{
1502 my ($file) = @_;
1503
1504 my $locations = locate_config_files($file);
1505
1506 return shift @$locations; # returns undef if 'locations' is empty
1507}
1508
1509
1510sub locate_config_files
1511{
1512 my ($file) = @_;
1513
1514 my @locations = ();
1515
1516 if (-e $file) {
1517 # Clearly specified (most likely full filename)
1518 # No need to hunt in 'etc' directories, return value unchanged
1519 push(@locations,$file);
1520 }
1521 else {
1522 # Check for collection specific one before looking in global GSDL 'etc'
[16969]1523 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1524 my $test_collect_etc_filename
1525 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1526
1527 if (-e $test_collect_etc_filename) {
1528 push(@locations,$test_collect_etc_filename);
1529 }
[15018]1530 }
1531 my $test_main_etc_filename
1532 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1533 if (-e $test_main_etc_filename) {
1534 push(@locations,$test_main_etc_filename);
1535 }
1536 }
1537
1538 return \@locations;
1539}
1540
1541
[9955]1542sub hyperlink_text
1543{
1544 my ($text) = @_;
1545
1546 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1547 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1548
1549 return $text;
1550}
1551
1552
[16436]1553# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1554# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1555sub is_dir_empty
1556{
1557 my ($path) = @_;
1558 opendir DIR, $path;
1559 while(my $entry = readdir DIR) {
1560 next if($entry =~ /^\.\.?$/);
1561 closedir DIR;
1562 return 0;
1563 }
1564 closedir DIR;
1565 return 1;
1566}
1567
[18337]1568# Returns the given filename converted using either URL encoding or base64
1569# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]1570# (if it is just the tailname), then $no_suffix should be some defined value.
1571# rename_method can be url, none, base64
[18319]1572sub rename_file {
[18337]1573 my ($filename, $rename_method, $no_suffix) = @_;
[18329]1574
[18337]1575 if(!$filename) { # undefined or empty string
[18329]1576 return $filename;
1577 }
[18319]1578
[20413]1579 if (!$rename_method) {
1580 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1581 # Debugging information
[22856]1582 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1583 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]1584 $rename_method = "url";
1585 } elsif($rename_method eq "none") {
1586 return $filename; # would have already been renamed
1587 }
1588
[19762]1589 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1590 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1591 ###$filename =~ s/ /_/g;
[18337]1592
1593 my ($tailname,$dirname,$suffix);
1594 if($no_suffix) { # given a tailname, no suffix
1595 ($tailname,$dirname) = File::Basename::fileparse($filename);
1596 }
1597 else {
1598 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1599 }
[23388]1600 if (!$suffix) {
1601 $suffix = "";
1602 }
1603 else {
1604 $suffix = lc($suffix);
1605 }
[18337]1606
[20413]1607 if ($rename_method eq "url") {
[18319]1608 $tailname = &unicode::url_encode($tailname);
1609 }
1610 elsif ($rename_method eq "base64") {
[18341]1611 $tailname = &unicode::base64_encode($tailname);
[18319]1612 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1613 }
[18326]1614
[18319]1615 $filename = "$tailname$suffix";
[18326]1616 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]1617
1618 return $filename;
1619}
1620
[21616]1621
1622# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]1623sub rename_ldb_or_bdb_file {
[18657]1624 my ($filename_no_ext) = @_;
1625
1626 my $new_filename = "$filename_no_ext.gdb";
[21615]1627 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]1628 # try ldb
1629 my $old_filename = "$filename_no_ext.ldb";
1630
1631 if (-f $old_filename) {
[19056]1632 print STDERR "Renaming $old_filename to $new_filename\n";
1633 rename ($old_filename, $new_filename)
1634 || print STDERR "Rename failed: $!\n";
[18657]1635 return;
1636 }
1637 # try bdb
1638 $old_filename = "$filename_no_ext.bdb";
1639 if (-f $old_filename) {
[19056]1640 print STDERR "Renaming $old_filename to $new_filename\n";
1641 rename ($old_filename, $new_filename)
1642 || print STDERR "Rename failed: $!\n";
[18657]1643 return;
1644 }
1645}
1646
[24874]1647sub os_dir() {
1648
1649 my $gsdlarch = "";
1650 if(defined $ENV{'GSDLARCH'}) {
1651 $gsdlarch = $ENV{'GSDLARCH'};
1652 }
1653 return $ENV{'GSDLOS'}.$gsdlarch;
1654}
[18657]1655
[21719]1656# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1657# By default, /greenstone3 for GS3 or /greenstone for GS2.
1658sub get_greenstone_url_prefix() {
1659 # if already set on a previous occasion, just return that
1660 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1661 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1662
[21719]1663 my ($configfile, $urlprefix, $defaultUrlprefix);
1664 my @propertynames = ();
1665
1666 if($ENV{'GSDL3SRCHOME'}) {
1667 $defaultUrlprefix = "/greenstone3";
1668 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1669 push(@propertynames, qw/path\s*\=/);
1670 } else {
1671 $defaultUrlprefix = "/greenstone";
[24874]1672 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
[21719]1673 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1674 }
1675
1676 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1677
1678 if(!$urlprefix) { # no values found for URL prefix, use default values
1679 $urlprefix = $defaultUrlprefix;
1680 } else {
1681 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1682 $urlprefix =~ s/^\///; # remove the starting slash
1683 my @dirs = split(/(\\|\/)/, $urlprefix);
1684 $urlprefix = shift(@dirs);
1685
1686 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1687 $urlprefix = "/$urlprefix";
1688 }
1689 }
1690
1691 # set for the future
1692 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1693# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1694 return $urlprefix;
1695}
1696
1697
1698# Given a config file (xml or java properties file) and a list/array of regular expressions
1699# that represent property names to match on, this function will return the value for the 1st
1700# matching property name. If the return value is undefined, no matching property was found.
1701sub extract_propvalue_from_file() {
1702 my ($configfile, $propertynames) = @_;
1703
1704 my $value;
1705 unless(open(FIN, "<$configfile")) {
1706 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1707 return $value; # not initialised
1708 }
1709
1710 # Read the entire file at once, as one single line, then close it
1711 my $filecontents;
1712 {
1713 local $/ = undef;
1714 $filecontents = <FIN>;
1715 }
1716 close(FIN);
1717
1718 foreach my $regex (@$propertynames) {
1719 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1720 if($value) {
1721 $value =~ s/^\"//; # remove any startquotes
1722 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1723 last; # found value for a matching property, break from loop
1724 }
1725 }
1726
1727 return $value;
1728}
1729
[23306]1730# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1731# given that perllib is in @INC in order to invoke this subroutine.
1732# Call as follows -- after setting up INC to include perllib and
1733# after setting up GSDLHOME and GSDLOS:
1734#
1735# require util;
1736# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1737#
1738sub setup_greenstone_env() {
1739 my ($GSDLHOME, $GSDLOS) = @_;
1740
1741 #my %env_map = ();
1742 # Get the localised ENV settings of running a localised source setup.bash
[23314]1743 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1744 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1745 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[23306]1746 if($GSDLOS =~ m/windows/i) {
[23314]1747 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1748 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1749 }
1750 if (!open(PIN, "$perl_command |")) {
1751 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1752 }
[23306]1753
1754 while (defined (my $perl_output_line = <PIN>)) {
1755 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1756 #$env_map{$key}=$value;
1757 $ENV{$key}=$value;
1758 }
[24563]1759 close (PIN);
1760
[23306]1761 # If any keys in $ENV don't occur in Greenstone's localised env
1762 # (stored in $env_map), delete those entries from $ENV
1763 #foreach $key (keys %ENV) {
1764 # if(!defined $env_map{$key}) {
1765 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1766 # delete $ENV{$key}; # del $ENV(key, value) pair
1767 # }
1768 #}
1769 #undef %env_map;
1770}
1771
[24362]1772sub get_perl_exec() {
1773 my $perl_exec = $^X; # may return just "perl"
1774
1775 if($ENV{'PERLPATH'}) {
1776 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1777 if($ENV{'GSDLOS'} =~ m/windows/) {
1778 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1779 } else {
1780 $perl_exec = "$ENV{'PERLPATH'}/perl";
1781 }
1782 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1783 # containing the full path to the current perl executable we're using
1784 $perl_exec = $Config{perlpath}; # configured path for perl
1785 if (!-e $perl_exec) { # may not point to location on this machine
1786 $perl_exec = $^X; # may return just "perl"
1787 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1788 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1789 }
1790 }
1791 }
1792
1793 return $perl_exec;
1794}
1795
[25533]1796# returns the path to the java command in the JRE included with GS (if any),
1797# quoted to safeguard any spaces in this path, otherwise a simple java
1798# command is returned which assumes and will try for a system java.
[25512]1799sub get_java_command {
1800 my $java = "java";
1801 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1802 # after running setup.bat or from GLI which also runs setup.bat
1803 my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin");
1804 if(-d $java_bin) {
1805 $java = &util::filename_cat($java_bin,"java");
[25533]1806 $java = "\"".$java."\""; # quoted to preserve spaces in path
[25512]1807 }
1808 }
1809 return $java;
1810}
[24362]1811
[25512]1812
[25577]1813# Given the qualified collection name (colgroup/collection),
1814# returns the collection and colgroup parts
1815sub get_collection_parts {
1816 # http://perldoc.perl.org/File/Basename.html
1817 # my($filename, $directories, $suffix) = fileparse($path);
1818 # "$directories contains everything up to and including the last directory separator in the $path
1819 # including the volume (if applicable). The remainder of the $path is the $filename."
1820 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1821
1822 my $qualified_collection = shift(@_);
1823
1824 # Since activate.pl can be launched from the command-line, including by a user,
1825 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1826 # Also allow for the accidental inclusion of multiple slashes
1827 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1828
1829 if(!defined $collection) {
1830 $collection = $colgroup;
1831 $colgroup = "";
1832 }
1833 return ($collection, $colgroup);
1834}
1835
1836# work out the "collectdir/collection" location
1837sub resolve_collection_dir {
1838 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1839
1840 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1841
1842 if (defined $collect_dir) {
1843 return &util::filename_cat($collect_dir,$colgroup, $collection);
1844 }
[25796]1845 elsif (defined($ENV{'GSDLCOLLECTDIR'})) {
1846 return $ENV{'GSDLCOLLECTDIR'};
1847 }
[25577]1848 else {
1849 if (defined $site) {
1850 return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
1851 }
1852 else {
1853 return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
1854 }
1855 }
1856}
1857
[25994]1858# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1859# a directory containing sequentially numbered images.
1860sub create_itemfile
1861{
1862 my ($output_dir, $convert_basename, $convert_to) = @_;
1863 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
1864
1865 my $page_num = "";
1866 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
1867
1868 # Sort files in the directory by page_num
1869 sub page_number {
1870 my ($dir) = @_;
1871 my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1872
1873 $pagenum = 1 unless defined $pagenum;
1874 return $pagenum;
1875 }
1876
1877 # sort the files in the directory in the order of page_num rather than lexically.
1878 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1879
1880 # work out if the numbering of the now sorted image files starts at 0 or not
1881 # by checking the number of the first _image_ file (skipping item files)
1882 my $starts_at_0 = 0;
1883 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1884 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1885 $starts_at_0 = 1;
1886 }
1887
1888 my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
1889 open(FILE,">$item_file");
1890 print FILE "<PagedDocument>\n";
1891
1892 foreach my $file (@dir_files){
1893 if ($file !~ /\.item/i){
1894 $page_num = page_number($file);
1895 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
1896 print FILE " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1897 }
1898 }
1899
1900 print FILE "</PagedDocument>\n";
[26017]1901 close FILE;
[25994]1902 closedir DIR;
1903 return $item_file;
1904}
1905
[4]19061;
Note: See TracBrowser for help on using the repository browser.