source: main/trunk/greenstone2/perllib/util.pm@ 26206

Last change on this file since 26206 was 26206, checked in by ak19, 12 years ago

Fixes to get Remote Greenstone 3 working with client-gli: 1. client-GLI should not start the local GS3 server, since client-GLI will be running against a remote server. 2. The encryption process for authentication had been changed for GS3, so now Authentication.java has a main function which is invoked by gliserver's gsdlCGI.pm to encrypt the password. 4. UsersDB when converted to txt for parsing by gliserver.pl has a different structure, so gliserver.pl needs to take that into account. 5. util.pm's functions for prepending and appending to environment variables needs to use an OS dependant path separator. This was not noticed when testing the remote GS server on 32 bit linux so far, but the windows style path separator (semicolon) used so far didn't work on the 64 bit linux test machine.

  • Property svn:keywords set to Author Date Id Revision
File size: 54.7 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41 my (@files) = @_;
42
43 my @filefiles = ();
44
45 # make sure the files we want to delete exist
46 # and are regular files
47 foreach my $file (@files) {
48 if (!-e $file) {
49 print STDERR "util::rm $file does not exist\n";
50 } elsif ((!-f $file) && (!-l $file)) {
51 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52 } else {
53 push (@filefiles, $file);
54 }
55 }
56
57 # remove the files
58 my $numremoved = unlink @filefiles;
59
60 # check to make sure all of them were removed
61 if ($numremoved != scalar(@filefiles)) {
62 print STDERR "util::rm Not all files were removed\n";
63 }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine when debugging the deletion of individual files.
68sub rm_debug {
69 my (@files) = @_;
70 my @filefiles = ();
71
72 # make sure the files we want to delete exist
73 # and are regular files
74 foreach my $file (@files) {
75 if (!-e $file) {
76 print STDERR "util::rm $file does not exist\n";
77 } elsif ((!-f $file) && (!-l $file)) {
78 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79 } else { # debug message
80 unlink($file) or warn "Could not delete file $file: $!\n";
81 }
82 }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88 my ($files,$file_accept_re,$file_reject_re) = @_;
89
90# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92# print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93
94 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96 # recursively remove the files
97 foreach my $file (@files_array) {
98 $file =~ s/[\/\\]+$//; # remove trailing slashes
99
100 if (!-e $file) {
101 print STDERR "util::filtered_rm_r $file does not exist\n";
102
103 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104 # get the contents of this directory
105 if (!opendir (INDIR, $file)) {
106 print STDERR "util::filtered_rm_r could not open directory $file\n";
107 } else {
108 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109 closedir (INDIR);
110
111 # remove all the files in this directory
112 map {$_="$file/$_";} @filedir;
113 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115 if (!defined $file_accept_re && !defined $file_reject_re) {
116 # remove this directory
117 if (!rmdir $file) {
118 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119 }
120 }
121 }
122 } else {
123 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126 # remove this file
127 &rm ($file);
128 }
129 }
130 }
131}
132
133
134# recursive removal
135sub rm_r {
136 my (@files) = @_;
137
138 # use the more general (but reterospectively written function
139 # filtered_rm_r function()
140
141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149 my $dest = pop (@_);
150 my (@srcfiles) = @_;
151
152 # remove trailing slashes from source and destination files
153 $dest =~ s/[\\\/]+$//;
154 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156 # a few sanity checks
157 if (scalar (@srcfiles) == 0) {
158 print STDERR "util::mv no destination directory given\n";
159 return;
160 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161 print STDERR "util::mv if multiple source files are given the ".
162 "destination must be a directory\n";
163 return;
164 }
165
166 # move the files
167 foreach my $file (@srcfiles) {
168 my $tempdest = $dest;
169 if (-d $tempdest) {
170 my ($filename) = $file =~ /([^\\\/]+)$/;
171 $tempdest .= "/$filename";
172 }
173 if (!-e $file) {
174 print STDERR "util::mv $file does not exist\n";
175 } else {
176 if(!rename ($file, $tempdest)) {
177 print STDERR "**** Failed to rename $file to $tempdest\n";
178 &File::Copy::copy($file, $tempdest);
179 &rm($file);
180 }
181 elsif(-e $file) { # rename (partially) succeeded) but srcfile still exists after rename
182 #print STDERR "*** srcfile $file still exists after rename to $tempdest\n";
183 if(!-e $tempdest) {
184 print STDERR "@@@@ ERROR: $tempdest does not exist\n";
185 }
186 # Sometimes the rename operation fails (as does File::Copy::move).
187 # This turns out to be because the files are hardlinked.
188 # Need to do a copy-delete in this case, however, the copy step is not necessary:
189 # the srcfile got renamed into tempdest, but srcfile itself still exists, delete it.
190 #&File::Copy::copy($file, $tempdest);
191
192 &rm($file);
193 }
194 }
195 }
196}
197
198# Move the contents of source directory into target directory
199# (as opposed to merely replacing target dir with the src dir)
200# This can overwrite any files with duplicate names in the target
201# but other files and folders in the target will continue to exist
202sub mv_dir_contents {
203 my ($src_dir, $dest_dir) = @_;
204
205 # Obtain listing of all files within src_dir
206 # Note that readdir lists relative paths, as well as . and ..
207 opendir(DIR, "$src_dir");
208 my @files= readdir(DIR);
209 close(DIR);
210
211 my @full_path_files = ();
212 foreach my $file (@files) {
213 # process all except . and ..
214 unless($file eq "." || $file eq "..") {
215
216 my $dest_subdir = &filename_cat($dest_dir, $file); # $file is still a relative path
217
218 # construct absolute paths
219 $file = &filename_cat($src_dir, $file); # $file is now an absolute path
220
221 # Recurse on directories which have an equivalent in target dest_dir
222 # If $file is a directory that already exists in target $dest_dir,
223 # then a simple move operation will fail (definitely on Windows).
224 if(-d $file && -d $dest_subdir) {
225 #print STDERR "**** $file is a directory also existing in target, its contents to be copied to $dest_subdir\n";
226 &mv_dir_contents($file, $dest_subdir);
227
228 # now all content is moved across, delete empty dir in source folder
229 if(&is_dir_empty($file)) {
230 if (!rmdir $file) {
231 print STDERR "ERROR. util::mv_dir_contents couldn't remove directory $file\n";
232 }
233 } else { # error
234 print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_subdir\n";
235 }
236 } else { # process files and any directories that don't already exist with a simple move
237 push(@full_path_files, $file);
238 }
239 }
240 }
241
242 if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist
243 &mk_dir($dest_dir);
244 }
245
246 #print STDERR "@@@@@ Copying files |".join(",", @full_path_files)."| to: $dest_dir\n";
247
248 if(@full_path_files) { # if non-empty, there's something to copy across
249 &mv(@full_path_files, $dest_dir);
250 }
251}
252
253
254# copies a file or a group of files
255sub cp {
256 my $dest = pop (@_);
257 my (@srcfiles) = @_;
258
259 # remove trailing slashes from source and destination files
260 $dest =~ s/[\\\/]+$//;
261 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
262
263 # a few sanity checks
264 if (scalar (@srcfiles) == 0) {
265 print STDERR "util::cp no destination directory given\n";
266 return;
267 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
268 print STDERR "util::cp if multiple source files are given the ".
269 "destination must be a directory\n";
270 return;
271 }
272
273 # copy the files
274 foreach my $file (@srcfiles) {
275 my $tempdest = $dest;
276 if (-d $tempdest) {
277 my ($filename) = $file =~ /([^\\\/]+)$/;
278 $tempdest .= "/$filename";
279 }
280 if (!-e $file) {
281 print STDERR "util::cp $file does not exist\n";
282 } elsif (!-f $file) {
283 print STDERR "util::cp $file is not a plain file\n";
284 } else {
285 &File::Copy::copy ($file, $tempdest);
286 }
287 }
288}
289
290
291
292# recursively copies a file or group of files
293# syntax: cp_r (sourcefiles, destination directory)
294# destination must be a directory - to copy one file to
295# another use cp instead
296sub cp_r {
297 my $dest = pop (@_);
298 my (@srcfiles) = @_;
299
300 # a few sanity checks
301 if (scalar (@srcfiles) == 0) {
302 print STDERR "util::cp_r no destination directory given\n";
303 return;
304 } elsif (-f $dest) {
305 print STDERR "util::cp_r destination must be a directory\n";
306 return;
307 }
308
309 # create destination directory if it doesn't exist already
310 if (! -d $dest) {
311 my $store_umask = umask(0002);
312 mkdir ($dest, 0777);
313 umask($store_umask);
314 }
315
316 # copy the files
317 foreach my $file (@srcfiles) {
318
319 if (!-e $file) {
320 print STDERR "util::cp_r $file does not exist\n";
321
322 } elsif (-d $file) {
323 # make the new directory
324 my ($filename) = $file =~ /([^\\\/]*)$/;
325 $dest = &util::filename_cat ($dest, $filename);
326 my $store_umask = umask(0002);
327 mkdir ($dest, 0777);
328 umask($store_umask);
329
330 # get the contents of this directory
331 if (!opendir (INDIR, $file)) {
332 print STDERR "util::cp_r could not open directory $file\n";
333 } else {
334 my @filedir = readdir (INDIR);
335 closedir (INDIR);
336 foreach my $f (@filedir) {
337 next if $f =~ /^\.\.?$/;
338 # copy all the files in this directory
339 my $ff = &util::filename_cat ($file, $f);
340 &cp_r ($ff, $dest);
341 }
342 }
343
344 } else {
345 &cp($file, $dest);
346 }
347 }
348}
349# recursively copies a file or group of files
350# syntax: cp_r (sourcefiles, destination directory)
351# destination must be a directory - to copy one file to
352# another use cp instead
353sub cp_r_nosvn {
354 my $dest = pop (@_);
355 my (@srcfiles) = @_;
356
357 # a few sanity checks
358 if (scalar (@srcfiles) == 0) {
359 print STDERR "util::cp_r no destination directory given\n";
360 return;
361 } elsif (-f $dest) {
362 print STDERR "util::cp_r destination must be a directory\n";
363 return;
364 }
365
366 # create destination directory if it doesn't exist already
367 if (! -d $dest) {
368 my $store_umask = umask(0002);
369 mkdir ($dest, 0777);
370 umask($store_umask);
371 }
372
373 # copy the files
374 foreach my $file (@srcfiles) {
375
376 if (!-e $file) {
377 print STDERR "util::cp_r $file does not exist\n";
378
379 } elsif (-d $file) {
380 # make the new directory
381 my ($filename) = $file =~ /([^\\\/]*)$/;
382 $dest = &util::filename_cat ($dest, $filename);
383 my $store_umask = umask(0002);
384 mkdir ($dest, 0777);
385 umask($store_umask);
386
387 # get the contents of this directory
388 if (!opendir (INDIR, $file)) {
389 print STDERR "util::cp_r could not open directory $file\n";
390 } else {
391 my @filedir = readdir (INDIR);
392 closedir (INDIR);
393 foreach my $f (@filedir) {
394 next if $f =~ /^\.\.?$/;
395 next if $f =~ /^\.svn$/;
396 # copy all the files in this directory
397 my $ff = &util::filename_cat ($file, $f);
398 &cp_r ($ff, $dest);
399 }
400 }
401
402 } else {
403 &cp($file, $dest);
404 }
405 }
406}
407
408# copies a directory and its contents, excluding subdirectories, into a new directory
409sub cp_r_toplevel {
410 my $dest = pop (@_);
411 my (@srcfiles) = @_;
412
413 # a few sanity checks
414 if (scalar (@srcfiles) == 0) {
415 print STDERR "util::cp_r no destination directory given\n";
416 return;
417 } elsif (-f $dest) {
418 print STDERR "util::cp_r destination must be a directory\n";
419 return;
420 }
421
422 # create destination directory if it doesn't exist already
423 if (! -d $dest) {
424 my $store_umask = umask(0002);
425 mkdir ($dest, 0777);
426 umask($store_umask);
427 }
428
429 # copy the files
430 foreach my $file (@srcfiles) {
431
432 if (!-e $file) {
433 print STDERR "util::cp_r $file does not exist\n";
434
435 } elsif (-d $file) {
436 # make the new directory
437 my ($filename) = $file =~ /([^\\\/]*)$/;
438 $dest = &util::filename_cat ($dest, $filename);
439 my $store_umask = umask(0002);
440 mkdir ($dest, 0777);
441 umask($store_umask);
442
443 # get the contents of this directory
444 if (!opendir (INDIR, $file)) {
445 print STDERR "util::cp_r could not open directory $file\n";
446 } else {
447 my @filedir = readdir (INDIR);
448 closedir (INDIR);
449 foreach my $f (@filedir) {
450 next if $f =~ /^\.\.?$/;
451
452 # copy all the files in this directory, but not directories
453 my $ff = &util::filename_cat ($file, $f);
454 if (-f $ff) {
455 &cp($ff, $dest);
456 #&cp_r ($ff, $dest);
457 }
458 }
459 }
460
461 } else {
462 &cp($file, $dest);
463 }
464 }
465}
466
467sub mk_dir {
468 my ($dir) = @_;
469
470 my $store_umask = umask(0002);
471 my $mkdir_ok = mkdir ($dir, 0777);
472 umask($store_umask);
473
474 if (!$mkdir_ok)
475 {
476 print STDERR "util::mk_dir could not create directory $dir\n";
477 return;
478 }
479}
480
481# in case anyone cares - I did some testing (using perls Benchmark module)
482# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
483# slightly faster (surprisingly) - Stefan.
484sub mk_all_dir {
485 my ($dir) = @_;
486
487 # use / for the directory separator, remove duplicate and
488 # trailing slashes
489 $dir=~s/[\\\/]+/\//g;
490 $dir=~s/[\\\/]+$//;
491
492 # make sure the cache directory exists
493 my $dirsofar = "";
494 my $first = 1;
495 foreach my $dirname (split ("/", $dir)) {
496 $dirsofar .= "/" unless $first;
497 $first = 0;
498
499 $dirsofar .= $dirname;
500
501 next if $dirname =~ /^(|[a-z]:)$/i;
502 if (!-e $dirsofar)
503 {
504 my $store_umask = umask(0002);
505 my $mkdir_ok = mkdir ($dirsofar, 0777);
506 umask($store_umask);
507 if (!$mkdir_ok)
508 {
509 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
510 return;
511 }
512 }
513 }
514}
515
516# make hard link to file if supported by OS, otherwise copy the file
517sub hard_link {
518 my ($src, $dest, $verbosity) = @_;
519
520 # remove trailing slashes from source and destination files
521 $src =~ s/[\\\/]+$//;
522 $dest =~ s/[\\\/]+$//;
523
524## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
525 # a few sanity checks
526 if (-e $dest) {
527 # destination file already exists
528 return;
529 }
530 elsif (!-e $src) {
531 print STDERR "util::hard_link source file \"$src\" does not exist\n";
532 return 1;
533 }
534 elsif (-d $src) {
535 print STDERR "util::hard_link source \"$src\" is a directory\n";
536 return 1;
537 }
538
539 my $dest_dir = &File::Basename::dirname($dest);
540 mk_all_dir($dest_dir) if (!-e $dest_dir);
541
542
543 if (!link($src, $dest)) {
544 if ((!defined $verbosity) || ($verbosity>2)) {
545 print STDERR "util::hard_link: unable to create hard link. ";
546 print STDERR " Copying file: $src -> $dest\n";
547 }
548 &File::Copy::copy ($src, $dest);
549 }
550 return 0;
551}
552
553# make soft link to file if supported by OS, otherwise copy file
554sub soft_link {
555 my ($src, $dest, $ensure_paths_absolute) = @_;
556
557 # remove trailing slashes from source and destination files
558 $src =~ s/[\\\/]+$//;
559 $dest =~ s/[\\\/]+$//;
560
561 # Ensure file paths are absolute IF requested to do so
562 # Soft_linking didn't work for relative paths
563 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
564 # We need to ensure that the src file is the absolute path
565 # See http://perldoc.perl.org/File/Spec.html
566 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
567 $src = File::Spec->rel2abs($src); # make absolute
568 }
569 # Might as well ensure that the destination file's absolute path is used
570 if(!File::Spec->file_name_is_absolute( $dest )) {
571 $dest = File::Spec->rel2abs($dest); # make absolute
572 }
573 }
574
575 # a few sanity checks
576 if (!-e $src) {
577 print STDERR "util::soft_link source file $src does not exist\n";
578 return 0;
579 }
580
581 my $dest_dir = &File::Basename::dirname($dest);
582 mk_all_dir($dest_dir) if (!-e $dest_dir);
583
584 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
585
586 # symlink not supported on windows
587 &File::Copy::copy ($src, $dest);
588
589 } elsif (!eval {symlink($src, $dest)}) {
590 print STDERR "util::soft_link: unable to create soft link.\n";
591 return 0;
592 }
593
594 return 1;
595}
596
597# Primarily for filenames generated by processing
598# content of HTML files (which are mapped to UTF-8 internally)
599#
600# To turn this into an octet string that really exists on the file
601# system:
602# 1. don't need to do anything special for Unix-based systems
603# (as underlying file system is byte-code)
604# 2. need to map to short DOS filenames for Windows
605
606sub utf8_to_real_filename
607{
608 my ($utf8_filename) = @_;
609
610 my $real_filename;
611
612 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
613 require Win32;
614
615 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
616
617 my $unicode_filename = decode("utf8",$utf8_filename);
618 $real_filename = Win32::GetShortPathName($unicode_filename);
619 }
620 else {
621 $real_filename = $utf8_filename;
622 }
623
624 return $real_filename;
625}
626
627
628sub fd_exists
629{
630 my $filename_full_path = shift @_;
631 my $test_op = shift @_ || "-e";
632
633 # By default tests for existance of file or directory (-e)
634 # Can be made more specific by providing second parameter (e.g. -f or -d)
635
636 my $exists = 0;
637
638 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
639 require Win32;
640 my $filename_short_path = Win32::GetShortPathName($filename_full_path);
641 if (!defined $filename_short_path) {
642 # Was probably still in UTF8 form (not what is needed on Windows)
643 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
644 if (defined $unicode_filename_full_path) {
645 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
646 }
647 }
648 $filename_full_path = $filename_short_path;
649 }
650
651 if (defined $filename_full_path) {
652 $exists = eval "($test_op \$filename_full_path)";
653 }
654
655 return $exists;
656}
657
658sub file_exists
659{
660 my ($filename_full_path) = @_;
661
662 return fd_exists($filename_full_path,"-f");
663}
664
665sub dir_exists
666{
667 my ($filename_full_path) = @_;
668
669 return fd_exists($filename_full_path,"-d");
670}
671
672
673
674# updates a copy of a directory in some other part of the filesystem
675# verbosity settings are: 0=low, 1=normal, 2=high
676# both $fromdir and $todir should be absolute paths
677sub cachedir {
678 my ($fromdir, $todir, $verbosity) = @_;
679 $verbosity = 1 unless defined $verbosity;
680
681 # use / for the directory separator, remove duplicate and
682 # trailing slashes
683 $fromdir=~s/[\\\/]+/\//g;
684 $fromdir=~s/[\\\/]+$//;
685 $todir=~s/[\\\/]+/\//g;
686 $todir=~s/[\\\/]+$//;
687
688 &mk_all_dir ($todir);
689
690 # get the directories in ascending order
691 if (!opendir (FROMDIR, $fromdir)) {
692 print STDERR "util::cachedir could not read directory $fromdir\n";
693 return;
694 }
695 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
696 closedir (FROMDIR);
697
698 if (!opendir (TODIR, $todir)) {
699 print STDERR "util::cacedir could not read directory $todir\n";
700 return;
701 }
702 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
703 closedir (TODIR);
704
705 my $fromi = 0;
706 my $toi = 0;
707
708 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
709# print "fromi: $fromi toi: $toi\n";
710
711 # see if we should delete a file/directory
712 # this should happen if the file/directory
713 # is not in the from list or if its a different
714 # size, or has an older timestamp
715 if ($toi < scalar(@todir)) {
716 if (($fromi >= scalar(@fromdir)) ||
717 ($todir[$toi] lt $fromdir[$fromi] ||
718 ($todir[$toi] eq $fromdir[$fromi] &&
719 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
720 $verbosity)))) {
721
722 # the files are different
723 &rm_r("$todir/$todir[$toi]");
724 splice(@todir, $toi, 1); # $toi stays the same
725
726 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
727 # the files are the same
728 # if it is a directory, check its contents
729 if (-d "$todir/$todir[$toi]") {
730 &cachedir ("$fromdir/$fromdir[$fromi]",
731 "$todir/$todir[$toi]", $verbosity);
732 }
733
734 $toi++;
735 $fromi++;
736 next;
737 }
738 }
739
740 # see if we should insert a file/directory
741 # we should insert a file/directory if there
742 # is no tofiles left or if the tofile does not exist
743 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
744 $todir[$toi] gt $fromdir[$fromi])) {
745 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
746 splice (@todir, $toi, 0, $fromdir[$fromi]);
747
748 $toi++;
749 $fromi++;
750 }
751 }
752}
753
754# this function returns -1 if either file is not found
755# assumes that $file1 and $file2 are absolute file names or
756# in the current directory
757# $file2 is allowed to be newer than $file1
758sub differentfiles {
759 my ($file1, $file2, $verbosity) = @_;
760 $verbosity = 1 unless defined $verbosity;
761
762 $file1 =~ s/\/+$//;
763 $file2 =~ s/\/+$//;
764
765 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
766 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
767
768 return -1 unless (-e $file1 && -e $file2);
769 if ($file1name ne $file2name) {
770 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
771 return 1;
772 }
773
774 my @file1stat = stat ($file1);
775 my @file2stat = stat ($file2);
776
777 if (-d $file1) {
778 if (! -d $file2) {
779 print STDERR "one file is a directory\n" if ($verbosity >= 2);
780 return 1;
781 }
782 return 0;
783 }
784
785 # both must be regular files
786 unless (-f $file1 && -f $file2) {
787 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
788 return 1;
789 }
790
791 # the size of the files must be the same
792 if ($file1stat[7] != $file2stat[7]) {
793 print STDERR "different sized files\n" if ($verbosity >= 2);
794 return 1;
795 }
796
797 # the second file cannot be older than the first
798 if ($file1stat[9] > $file2stat[9]) {
799 print STDERR "file is older\n" if ($verbosity >= 2);
800 return 1;
801 }
802
803 return 0;
804}
805
806
807sub get_tmp_filename
808{
809 my $file_ext = shift(@_) || undef;
810
811 my $opt_dot_file_ext = "";
812 if (defined $file_ext) {
813 if ($file_ext !~ m/\./) {
814 # no dot, so needs one added in at start
815 $opt_dot_file_ext = ".$file_ext"
816 }
817 else {
818 # allow for "extensions" such as _metadata.txt to be handled
819 # gracefully
820 $opt_dot_file_ext = $file_ext;
821 }
822 }
823
824 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
825 &mk_all_dir ($tmpdir) unless -e $tmpdir;
826
827 my $count = 1000;
828 my $rand = int(rand $count);
829 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
830
831 while (-e $full_tmp_filename) {
832 $rand = int(rand $count);
833 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
834 $count++;
835 }
836
837 return $full_tmp_filename;
838}
839
840sub get_timestamped_tmp_folder
841{
842
843 my $tmp_dirname;
844 if(defined $ENV{'GSDLCOLLECTDIR'}) {
845 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
846 } elsif(defined $ENV{'GSDLHOME'}) {
847 $tmp_dirname = $ENV{'GSDLHOME'};
848 } else {
849 return undef;
850 }
851
852 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
853 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
854
855 # add the timestamp into the path otherwise we can run into problems
856 # if documents have the same name
857 my $timestamp = time;
858 my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
859 $tmp_dirname = $time_tmp_dirname;
860 my $i = 1;
861 while (-e $tmp_dirname) {
862 $tmp_dirname = "$time_tmp_dirname$i";
863 $i++;
864 }
865 &util::mk_dir($tmp_dirname);
866
867 return $tmp_dirname;
868}
869
870sub get_timestamped_tmp_filename_in_collection
871{
872
873 my ($input_filename, $output_ext) = @_;
874 # derive tmp filename from input filename
875 my ($tailname, $dirname, $suffix)
876 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
877
878 # softlink to collection tmp dir
879 my $tmp_dirname = &util::get_timestamped_tmp_folder();
880 $tmp_dirname = $dirname unless defined $tmp_dirname;
881
882 # following two steps copied from ConvertBinaryFile
883 # do we need them?? can't use them as is, as they use plugin methods.
884
885 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
886
887 # URLEncode this since htmls with images where the html filename is utf8 don't seem
888 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
889 # files on the filesystem.
890 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
891 if (defined $output_ext) {
892 $output_ext = ".$output_ext"; # add the dot
893 } else {
894 $output_ext = $suffix;
895 }
896 $output_ext= lc($output_ext);
897 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
898
899 return $tmp_filename;
900}
901
902sub get_toplevel_tmp_dir
903{
904 return filename_cat($ENV{'GSDLHOME'}, "tmp");
905}
906
907
908sub filename_to_regex {
909 my $filename = shift (@_);
910
911 # need to make single backslashes double so that regex works
912 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
913
914 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
915 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
916 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
917 $filename =~ s@\(@\\(@g; # escape brackets
918 $filename =~ s@\)@\\)@g; # escape brackets
919 $filename =~ s@\[@\\[@g; # escape brackets
920 $filename =~ s@\]@\\]@g; # escape brackets
921
922 return $filename;
923}
924
925sub unregex_filename {
926 my $filename = shift (@_);
927
928 # need to put doubled backslashes for regex back to single
929 $filename =~ s/\\\./\./g; # remove RE syntax for .
930 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
931 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
932 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
933 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
934
935 # \\ goes to \
936 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
937 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
938 return $filename;
939}
940
941sub filename_cat {
942 my $first_file = shift(@_);
943 my (@filenames) = @_;
944
945# Useful for debugging
946# -- might make sense to call caller(0) rather than (1)??
947# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
948# print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
949
950 # If first_file is not null or empty, then add it back into the list
951 if (defined $first_file && $first_file =~ /\S/) {
952 unshift(@filenames, $first_file);
953 }
954
955 my $filename = join("/", @filenames);
956
957 # remove duplicate slashes and remove the last slash
958 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
959 $filename =~ s/[\\\/]+/\\/g;
960 } else {
961 $filename =~ s/[\/]+/\//g;
962 # DB: want a filename abc\de.html to remain like this
963 }
964 $filename =~ s/[\\\/]$//;
965
966 return $filename;
967}
968
969
970sub pathname_cat {
971 my $first_path = shift(@_);
972 my (@pathnames) = @_;
973
974 # If first_path is not null or empty, then add it back into the list
975 if (defined $first_path && $first_path =~ /\S/) {
976 unshift(@pathnames, $first_path);
977 }
978
979 my $join_char;
980 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
981 $join_char = ";";
982 } else {
983 $join_char = ":";
984 }
985
986 my $pathname = join($join_char, @pathnames);
987
988 # remove duplicate slashes
989 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
990 $pathname =~ s/[\\\/]+/\\/g;
991 } else {
992 $pathname =~ s/[\/]+/\//g;
993 # DB: want a pathname abc\de.html to remain like this
994 }
995
996 return $pathname;
997}
998
999
1000sub tidy_up_oid {
1001 my ($OID) = @_;
1002 if ($OID =~ /\./) {
1003 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
1004 $OID =~ s/\.//g; #remove any periods
1005 }
1006 if ($OID =~ /^\s.*\s$/) {
1007 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
1008 # remove starting and trailing whitespace
1009 $OID =~ s/^\s+//;
1010 $OID =~ s/\s+$//;
1011 }
1012 if ($OID =~ /^[\d]*$/) {
1013 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
1014 $OID = "D" . $OID;
1015 }
1016
1017 return $OID;
1018}
1019
1020sub envvar_prepend {
1021 my ($var,$val) = @_;
1022
1023 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
1024 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
1025
1026 # do not prepend any value/path that's already in the environment variable
1027
1028 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1029 if (!defined($ENV{$var})) {
1030 $ENV{$var} = "$val";
1031 }
1032 elsif($ENV{$var} !~ m/$escaped_val/) {
1033 $ENV{$var} = "$val".$pathsep.$ENV{$var};
1034 }
1035}
1036
1037sub envvar_append {
1038 my ($var,$val) = @_;
1039
1040 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
1041 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
1042
1043 # do not append any value/path that's already in the environment variable
1044
1045 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1046 if (!defined($ENV{$var})) {
1047 $ENV{$var} = "$val";
1048 }
1049 elsif($ENV{$var} !~ m/$escaped_val/) {
1050 $ENV{$var} = $ENV{$var}.$pathsep."$val";
1051 }
1052}
1053
1054
1055# splits a filename into a prefix and a tail extension using the tail_re, or
1056# if that fails, splits on the file_extension . (dot)
1057sub get_prefix_and_tail_by_regex {
1058
1059 my ($filename,$tail_re) = @_;
1060
1061 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
1062 if ((!defined $file_prefix) || (!defined $file_ext)) {
1063 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1064 }
1065
1066 return ($file_prefix,$file_ext);
1067}
1068
1069# get full path and file only path from a base_dir (which may be empty) and
1070# file (which may contain directories)
1071sub get_full_filenames {
1072 my ($base_dir, $file) = @_;
1073
1074 my $filename_full_path = $file;
1075 # add on directory if present
1076 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1077
1078 my $filename_no_path = $file;
1079
1080 # remove directory if present
1081 $filename_no_path =~ s/^.*[\/\\]//;
1082 return ($filename_full_path, $filename_no_path);
1083}
1084
1085# returns the path of a file without the filename -- ie. the directory the file is in
1086sub filename_head {
1087 my $filename = shift(@_);
1088
1089 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1090 $filename =~ s/[^\\\\]*$//;
1091 }
1092 else {
1093 $filename =~ s/[^\\\/]*$//;
1094 }
1095
1096 return $filename;
1097}
1098
1099
1100
1101# returns 1 if filename1 and filename2 point to the same
1102# file or directory
1103sub filenames_equal {
1104 my ($filename1, $filename2) = @_;
1105
1106 # use filename_cat to clean up trailing slashes and
1107 # multiple slashes
1108 $filename1 = filename_cat ($filename1);
1109 $filename2 = filename_cat ($filename2);
1110
1111 # filenames not case sensitive on windows
1112 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1113 $filename1 =~ tr/[A-Z]/[a-z]/;
1114 $filename2 =~ tr/[A-Z]/[a-z]/;
1115 }
1116 return 1 if $filename1 eq $filename2;
1117 return 0;
1118}
1119
1120# If filename is relative to within_dir, returns the relative path of filename to that directory
1121# with slashes in the filename returned as they were in the original (absolute) filename.
1122sub filename_within_directory
1123{
1124 my ($filename,$within_dir) = @_;
1125
1126 if ($within_dir !~ m/[\/\\]$/) {
1127 my $dirsep = &util::get_dirsep();
1128 $within_dir .= $dirsep;
1129 }
1130
1131 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
1132 if ($filename =~ m/^$within_dir(.*)$/) {
1133 $filename = $1;
1134 }
1135
1136 return $filename;
1137}
1138
1139# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1140# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1141# The subpath returned will also be a URL type filename.
1142sub filename_within_directory_url_format
1143{
1144 my ($filename,$within_dir) = @_;
1145
1146 # convert parameters only to / slashes if Windows
1147
1148 my $filename_urlformat = &filepath_to_url_format($filename);
1149 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
1150
1151 #if ($within_dir_urlformat !~ m/\/$/) {
1152 # make sure directory ends with a slash
1153 #$within_dir_urlformat .= "/";
1154 #}
1155
1156 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1157
1158 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1159
1160 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1161 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1162 $filename_urlformat = $1;
1163 }
1164
1165 return $filename_urlformat;
1166}
1167
1168# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
1169# since on Linux it doesn't represent a file separator but an escape char).
1170sub filepath_to_url_format
1171{
1172 my ($filepath) = @_;
1173 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1174 # Only need to worry about Windows, as Unix style directories already in url-format
1175 # Convert Windows style \ => /
1176 $filepath =~ s@\\@/@g;
1177 }
1178 return $filepath;
1179}
1180
1181# regex filepaths on windows may include \\ as path separator. Convert \\ to /
1182sub filepath_regex_to_url_format
1183{
1184 my ($filepath) = @_;
1185 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1186 # Only need to worry about Windows, as Unix style directories already in url-format
1187 # Convert Windows style \\ => /
1188 $filepath =~ s@\\\\@/@g;
1189 }
1190 return $filepath;
1191
1192}
1193
1194# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
1195# and ignores trailing /
1196# returns (file, dirs) dirs will be empty if no subdirs
1197sub url_fileparse
1198{
1199 my ($filepath) = @_;
1200 # remove trailing /
1201 $filepath =~ s@/$@@;
1202 if ($filepath !~ m@/@) {
1203 return ($filepath, "");
1204 }
1205 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
1206 return ($file, $dirs);
1207
1208}
1209
1210
1211sub filename_within_collection
1212{
1213 my ($filename) = @_;
1214
1215 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1216
1217 if (defined $collect_dir) {
1218
1219 # if from within GSDLCOLLECTDIR, then remove directory prefix
1220 # so source_filename is realative to it. This is done to aid
1221 # portability, i.e. the collection can be moved to somewhere
1222 # else on the file system and the archives directory will still
1223 # work. This is needed, for example in the applet version of
1224 # GLI where GSDLHOME/collect on the server will be different to
1225 # the collect directory of the remove user. Of course,
1226 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1227 # it back into a full pathname.
1228
1229 $filename = filename_within_directory($filename,$collect_dir);
1230 }
1231
1232 return $filename;
1233}
1234
1235sub prettyprint_file
1236{
1237 my ($base_dir,$file,$gli) = @_;
1238
1239 my $filename_full_path = &util::filename_cat($base_dir,$file);
1240
1241 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1242 require Win32;
1243
1244 # For some reason base_dir in the form c:/a/b/c
1245 # This leads to confusion later on, so turn it back into
1246 # the more usual Windows form
1247 $base_dir =~ s/\//\\/g;
1248 my $long_base_dir = Win32::GetLongPathName($base_dir);
1249 my $long_full_path = Win32::GetLongPathName($filename_full_path);
1250
1251 $file = filename_within_directory($long_full_path,$long_base_dir);
1252 $file = encode("utf8",$file) if ($gli);
1253 }
1254
1255 return $file;
1256}
1257
1258
1259sub upgrade_if_dos_filename
1260{
1261 my ($filename_full_path,$and_encode) = @_;
1262
1263 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1264 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1265 # to its long (Windows) version
1266 my $long_filename = Win32::GetLongPathName($filename_full_path);
1267 if (defined $long_filename) {
1268 $filename_full_path = $long_filename;
1269 }
1270 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1271 $filename_full_path =~ s/^(.):/\u$1:/;
1272 if ((defined $and_encode) && ($and_encode)) {
1273 $filename_full_path = encode("utf8",$filename_full_path);
1274 }
1275 }
1276
1277 return $filename_full_path;
1278}
1279
1280
1281sub downgrade_if_dos_filename
1282{
1283 my ($filename_full_path) = @_;
1284
1285 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1286 require Win32;
1287
1288 # Ensure the given long Windows filename is in a form that can
1289 # be opened by Perl => convert it to a short DOS-like filename
1290
1291 my $short_filename = Win32::GetShortPathName($filename_full_path);
1292 if (defined $short_filename) {
1293 $filename_full_path = $short_filename;
1294 }
1295 # Make sure initial drive letter is lower-case (to fit in
1296 # with rest of Greenstone)
1297 $filename_full_path =~ s/^(.):/\u$1:/;
1298 }
1299
1300 return $filename_full_path;
1301}
1302
1303sub block_filename
1304{
1305 my ($block_hash,$filename) = @_;
1306
1307 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1308
1309 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1310 my $lower_filename = lc($filename);
1311 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1312# my $lower_drive = $filename;
1313# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1314
1315# my $upper_drive = $filename;
1316# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1317#
1318# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1319# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
1320 }
1321 else {
1322 $block_hash->{'file_blocks'}->{$filename} = 1;
1323 }
1324}
1325
1326
1327sub filename_is_absolute
1328{
1329 my ($filename) = @_;
1330
1331 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1332 return ($filename =~ m/^(\w:)?\\/);
1333 }
1334 else {
1335 return ($filename =~ m/^\//);
1336 }
1337}
1338
1339
1340## @method make_absolute()
1341#
1342# Ensure the given file path is absolute in respect to the given base path.
1343#
1344# @param $base_dir A string denoting the base path the given dir must be
1345# absolute to.
1346# @param $dir The directory to be made absolute as a string. Note that the
1347# dir may already be absolute, in which case it will remain
1348# unchanged.
1349# @return The now absolute form of the directory as a string.
1350#
1351# @author John Thompson, DL Consulting Ltd.
1352# @copy 2006 DL Consulting Ltd.
1353#
1354#used in buildcol.pl, doesn't work for all cases --kjdon
1355sub make_absolute {
1356
1357 my ($base_dir, $dir) = @_;
1358### print STDERR "dir = $dir\n";
1359 $dir =~ s/[\\\/]+/\//g;
1360 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1361 $dir =~ s|^/tmp_mnt||;
1362 1 while($dir =~ s|/[^/]*/\.\./|/|g);
1363 $dir =~ s|/[.][.]?/|/|g;
1364 $dir =~ tr|/|/|s;
1365### print STDERR "dir = $dir\n";
1366
1367 return $dir;
1368}
1369## make_absolute() ##
1370
1371sub get_dirsep {
1372
1373 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1374 return "\\";
1375 } else {
1376 return "\/";
1377 }
1378}
1379
1380sub get_os_dirsep {
1381
1382 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1383 return "\\\\";
1384 } else {
1385 return "\\\/";
1386 }
1387}
1388
1389sub get_re_dirsep {
1390
1391 return "\\\\|\\\/";
1392}
1393
1394
1395sub get_dirsep_tail {
1396 my ($filename) = @_;
1397
1398 # returns last part of directory or filename
1399 # On unix e.g. a/b.d => b.d
1400 # a/b/c => c
1401
1402 my $dirsep = get_re_dirsep();
1403 my @dirs = split (/$dirsep/, $filename);
1404 my $tail = pop @dirs;
1405
1406 # - caused problems under windows
1407 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1408
1409 return $tail;
1410}
1411
1412
1413# if this is running on windows we want binaries to end in
1414# .exe, otherwise they don't have to end in any extension
1415sub get_os_exe {
1416 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1417 return "";
1418}
1419
1420
1421# test to see whether this is a big or little endian machine
1422sub is_little_endian
1423{
1424 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1425 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1426 # Otherwise, it's little endian
1427
1428 #return 0 if $^O =~ /^darwin$/i;
1429 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1430
1431 # Going back to stating exactly whether the machine is little endian
1432 # or big endian, without any special case for Macs. Since for rata it comes
1433 # back with little endian and for shuttle with bigendian.
1434 return (ord(substr(pack("s",1), 0, 1)) == 1);
1435}
1436
1437
1438# will return the collection name if successful, "" otherwise
1439sub use_collection {
1440 my ($collection, $collectdir) = @_;
1441
1442 if (!defined $collectdir || $collectdir eq "") {
1443 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1444 }
1445
1446 # get and check the collection
1447 if (!defined($collection) || $collection eq "") {
1448 if (defined $ENV{'GSDLCOLLECTION'}) {
1449 $collection = $ENV{'GSDLCOLLECTION'};
1450 } else {
1451 print STDOUT "No collection specified\n";
1452 return "";
1453 }
1454 }
1455
1456 if ($collection eq "modelcol") {
1457 print STDOUT "You can't use modelcol.\n";
1458 return "";
1459 }
1460
1461 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1462 # are defined
1463 $ENV{'GSDLCOLLECTION'} = $collection;
1464 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1465
1466 # make sure this collection exists
1467 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1468 print STDOUT "Invalid collection ($collection).\n";
1469 return "";
1470 }
1471
1472 # everything is ready to go
1473 return $collection;
1474}
1475
1476sub get_current_collection_name {
1477 return $ENV{'GSDLCOLLECTION'};
1478}
1479
1480
1481# will return the collection name if successful, "" otherwise.
1482# Like use_collection (above) but for greenstone 3 (taking account of site level)
1483
1484sub use_site_collection {
1485 my ($site, $collection, $collectdir) = @_;
1486
1487 if (!defined $collectdir || $collectdir eq "") {
1488 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1489 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1490 }
1491
1492 # collectdir explicitly set by this point (using $site variable if required).
1493 # Can call "old" gsdl2 use_collection now.
1494
1495 return use_collection($collection,$collectdir);
1496}
1497
1498
1499
1500sub locate_config_file
1501{
1502 my ($file) = @_;
1503
1504 my $locations = locate_config_files($file);
1505
1506 return shift @$locations; # returns undef if 'locations' is empty
1507}
1508
1509
1510sub locate_config_files
1511{
1512 my ($file) = @_;
1513
1514 my @locations = ();
1515
1516 if (-e $file) {
1517 # Clearly specified (most likely full filename)
1518 # No need to hunt in 'etc' directories, return value unchanged
1519 push(@locations,$file);
1520 }
1521 else {
1522 # Check for collection specific one before looking in global GSDL 'etc'
1523 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1524 my $test_collect_etc_filename
1525 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1526
1527 if (-e $test_collect_etc_filename) {
1528 push(@locations,$test_collect_etc_filename);
1529 }
1530 }
1531 my $test_main_etc_filename
1532 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1533 if (-e $test_main_etc_filename) {
1534 push(@locations,$test_main_etc_filename);
1535 }
1536 }
1537
1538 return \@locations;
1539}
1540
1541
1542sub hyperlink_text
1543{
1544 my ($text) = @_;
1545
1546 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1547 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1548
1549 return $text;
1550}
1551
1552
1553# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1554# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1555sub is_dir_empty
1556{
1557 my ($path) = @_;
1558 opendir DIR, $path;
1559 while(my $entry = readdir DIR) {
1560 next if($entry =~ /^\.\.?$/);
1561 closedir DIR;
1562 return 0;
1563 }
1564 closedir DIR;
1565 return 1;
1566}
1567
1568# Returns the given filename converted using either URL encoding or base64
1569# encoding, as specified by $rename_method. If the given filename has no suffix
1570# (if it is just the tailname), then $no_suffix should be some defined value.
1571# rename_method can be url, none, base64
1572sub rename_file {
1573 my ($filename, $rename_method, $no_suffix) = @_;
1574
1575 if(!$filename) { # undefined or empty string
1576 return $filename;
1577 }
1578
1579 if (!$rename_method) {
1580 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1581 # Debugging information
1582 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1583 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1584 $rename_method = "url";
1585 } elsif($rename_method eq "none") {
1586 return $filename; # would have already been renamed
1587 }
1588
1589 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1590 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1591 ###$filename =~ s/ /_/g;
1592
1593 my ($tailname,$dirname,$suffix);
1594 if($no_suffix) { # given a tailname, no suffix
1595 ($tailname,$dirname) = File::Basename::fileparse($filename);
1596 }
1597 else {
1598 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1599 }
1600 if (!$suffix) {
1601 $suffix = "";
1602 }
1603 else {
1604 $suffix = lc($suffix);
1605 }
1606
1607 if ($rename_method eq "url") {
1608 $tailname = &unicode::url_encode($tailname);
1609 }
1610 elsif ($rename_method eq "base64") {
1611 $tailname = &unicode::base64_encode($tailname);
1612 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1613 }
1614
1615 $filename = "$tailname$suffix";
1616 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1617
1618 return $filename;
1619}
1620
1621
1622# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1623sub rename_ldb_or_bdb_file {
1624 my ($filename_no_ext) = @_;
1625
1626 my $new_filename = "$filename_no_ext.gdb";
1627 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1628 # try ldb
1629 my $old_filename = "$filename_no_ext.ldb";
1630
1631 if (-f $old_filename) {
1632 print STDERR "Renaming $old_filename to $new_filename\n";
1633 rename ($old_filename, $new_filename)
1634 || print STDERR "Rename failed: $!\n";
1635 return;
1636 }
1637 # try bdb
1638 $old_filename = "$filename_no_ext.bdb";
1639 if (-f $old_filename) {
1640 print STDERR "Renaming $old_filename to $new_filename\n";
1641 rename ($old_filename, $new_filename)
1642 || print STDERR "Rename failed: $!\n";
1643 return;
1644 }
1645}
1646
1647sub os_dir() {
1648
1649 my $gsdlarch = "";
1650 if(defined $ENV{'GSDLARCH'}) {
1651 $gsdlarch = $ENV{'GSDLARCH'};
1652 }
1653 return $ENV{'GSDLOS'}.$gsdlarch;
1654}
1655
1656# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1657# By default, /greenstone3 for GS3 or /greenstone for GS2.
1658sub get_greenstone_url_prefix() {
1659 # if already set on a previous occasion, just return that
1660 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1661 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1662
1663 my ($configfile, $urlprefix, $defaultUrlprefix);
1664 my @propertynames = ();
1665
1666 if($ENV{'GSDL3SRCHOME'}) {
1667 $defaultUrlprefix = "/greenstone3";
1668 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1669 push(@propertynames, qw/path\s*\=/);
1670 } else {
1671 $defaultUrlprefix = "/greenstone";
1672 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1673 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1674 }
1675
1676 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1677
1678 if(!$urlprefix) { # no values found for URL prefix, use default values
1679 $urlprefix = $defaultUrlprefix;
1680 } else {
1681 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1682 $urlprefix =~ s/^\///; # remove the starting slash
1683 my @dirs = split(/(\\|\/)/, $urlprefix);
1684 $urlprefix = shift(@dirs);
1685
1686 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1687 $urlprefix = "/$urlprefix";
1688 }
1689 }
1690
1691 # set for the future
1692 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1693# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1694 return $urlprefix;
1695}
1696
1697
1698# Given a config file (xml or java properties file) and a list/array of regular expressions
1699# that represent property names to match on, this function will return the value for the 1st
1700# matching property name. If the return value is undefined, no matching property was found.
1701sub extract_propvalue_from_file() {
1702 my ($configfile, $propertynames) = @_;
1703
1704 my $value;
1705 unless(open(FIN, "<$configfile")) {
1706 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1707 return $value; # not initialised
1708 }
1709
1710 # Read the entire file at once, as one single line, then close it
1711 my $filecontents;
1712 {
1713 local $/ = undef;
1714 $filecontents = <FIN>;
1715 }
1716 close(FIN);
1717
1718 foreach my $regex (@$propertynames) {
1719 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1720 if($value) {
1721 $value =~ s/^\"//; # remove any startquotes
1722 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1723 last; # found value for a matching property, break from loop
1724 }
1725 }
1726
1727 return $value;
1728}
1729
1730# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1731# given that perllib is in @INC in order to invoke this subroutine.
1732# Call as follows -- after setting up INC to include perllib and
1733# after setting up GSDLHOME and GSDLOS:
1734#
1735# require util;
1736# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1737#
1738sub setup_greenstone_env() {
1739 my ($GSDLHOME, $GSDLOS) = @_;
1740
1741 #my %env_map = ();
1742 # Get the localised ENV settings of running a localised source setup.bash
1743 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1744 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1745 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
1746 if($GSDLOS =~ m/windows/i) {
1747 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1748 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1749 }
1750 if (!open(PIN, "$perl_command |")) {
1751 print STDERR ("Unable to execute command: $perl_command. $!\n");
1752 }
1753
1754 while (defined (my $perl_output_line = <PIN>)) {
1755 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1756 #$env_map{$key}=$value;
1757 $ENV{$key}=$value;
1758 }
1759 close (PIN);
1760
1761 # If any keys in $ENV don't occur in Greenstone's localised env
1762 # (stored in $env_map), delete those entries from $ENV
1763 #foreach $key (keys %ENV) {
1764 # if(!defined $env_map{$key}) {
1765 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1766 # delete $ENV{$key}; # del $ENV(key, value) pair
1767 # }
1768 #}
1769 #undef %env_map;
1770}
1771
1772sub get_perl_exec() {
1773 my $perl_exec = $^X; # may return just "perl"
1774
1775 if($ENV{'PERLPATH'}) {
1776 # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1777 if($ENV{'GSDLOS'} =~ m/windows/) {
1778 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1779 } else {
1780 $perl_exec = "$ENV{'PERLPATH'}/perl";
1781 }
1782 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1783 # containing the full path to the current perl executable we're using
1784 $perl_exec = $Config{perlpath}; # configured path for perl
1785 if (!-e $perl_exec) { # may not point to location on this machine
1786 $perl_exec = $^X; # may return just "perl"
1787 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1788 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1789 }
1790 }
1791 }
1792
1793 return $perl_exec;
1794}
1795
1796# returns the path to the java command in the JRE included with GS (if any),
1797# quoted to safeguard any spaces in this path, otherwise a simple java
1798# command is returned which assumes and will try for a system java.
1799sub get_java_command {
1800 my $java = "java";
1801 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1802 # after running setup.bat or from GLI which also runs setup.bat
1803 my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin");
1804 if(-d $java_bin) {
1805 $java = &util::filename_cat($java_bin,"java");
1806 $java = "\"".$java."\""; # quoted to preserve spaces in path
1807 }
1808 }
1809 return $java;
1810}
1811
1812
1813# Given the qualified collection name (colgroup/collection),
1814# returns the collection and colgroup parts
1815sub get_collection_parts {
1816 # http://perldoc.perl.org/File/Basename.html
1817 # my($filename, $directories, $suffix) = fileparse($path);
1818 # "$directories contains everything up to and including the last directory separator in the $path
1819 # including the volume (if applicable). The remainder of the $path is the $filename."
1820 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1821
1822 my $qualified_collection = shift(@_);
1823
1824 # Since activate.pl can be launched from the command-line, including by a user,
1825 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1826 # Also allow for the accidental inclusion of multiple slashes
1827 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1828
1829 if(!defined $collection) {
1830 $collection = $colgroup;
1831 $colgroup = "";
1832 }
1833 return ($collection, $colgroup);
1834}
1835
1836# work out the "collectdir/collection" location
1837sub resolve_collection_dir {
1838 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1839
1840 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1841
1842 if (defined $collect_dir) {
1843 return &util::filename_cat($collect_dir,$colgroup, $collection);
1844 }
1845 elsif (defined($ENV{'GSDLCOLLECTDIR'})) {
1846 return $ENV{'GSDLCOLLECTDIR'};
1847 }
1848 else {
1849 if (defined $site) {
1850 return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
1851 }
1852 else {
1853 return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
1854 }
1855 }
1856}
1857
1858# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1859# a directory containing sequentially numbered images.
1860sub create_itemfile
1861{
1862 my ($output_dir, $convert_basename, $convert_to) = @_;
1863 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
1864
1865 my $page_num = "";
1866 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
1867
1868 # Sort files in the directory by page_num
1869 sub page_number {
1870 my ($dir) = @_;
1871 my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1872
1873 $pagenum = 1 unless defined $pagenum;
1874 return $pagenum;
1875 }
1876
1877 # sort the files in the directory in the order of page_num rather than lexically.
1878 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1879
1880 # work out if the numbering of the now sorted image files starts at 0 or not
1881 # by checking the number of the first _image_ file (skipping item files)
1882 my $starts_at_0 = 0;
1883 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1884 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1885 $starts_at_0 = 1;
1886 }
1887
1888 my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
1889 open(FILE,">$item_file");
1890 print FILE "<PagedDocument>\n";
1891
1892 foreach my $file (@dir_files){
1893 if ($file !~ /\.item/i){
1894 $page_num = page_number($file);
1895 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
1896 print FILE " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1897 }
1898 }
1899
1900 print FILE "</PagedDocument>\n";
1901 close FILE;
1902 closedir DIR;
1903 return $item_file;
1904}
1905
19061;
Note: See TracBrowser for help on using the repository browser.