source: main/trunk/greenstone2/perllib/util.pm@ 31862

Last change on this file since 31862 was 31862, checked in by ak19, 7 years ago

Adding a new subroutine print_env() that may be useful for debugging env vars. It will print the values of all environment variables to STDERR if no specific ones are requested. If specific env vars are requested, it will just print their values to STDERR.

  • Property svn:keywords set to Author Date Id Revision
File size: 62.6 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
[27509]29use FileUtils;
[23362]30
31use Encode;
[29807]32use Unicode::Normalize 'normalize';
33
[4]34use File::Copy;
[619]35use File::Basename;
[24362]36# Config for getting the perlpath in the recommended way, though it uses paths that are
37# hard-coded into the Config file that's generated upon configuring and compiling perl.
38# $^X works better in some cases to return the path to perl used to launch the script,
39# but if launched with plain "perl" (no full-path), that will be just what it returns.
[27303]40use Config;
41# New module for file related utility functions - intended as a
42# placeholder for an extension that allows a variety of different
43# filesystems (FTP, HTTP, SAMBA, WEBDav, HDFS etc)
44use FileUtils;
[4]45
[28226]46if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[28225]47 require Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
48}
49
[4]50# removes files (but not directories)
51sub rm {
[27303]52 warnings::warnif("deprecated", "util::rm() is deprecated, using FileUtils::removeFiles() instead");
53 return &FileUtils::removeFiles(@_);
[4]54}
55
56# recursive removal
[10211]57sub filtered_rm_r {
[27303]58 warnings::warnif("deprecated", "util::filtered_rm_r() is deprecated, using FileUtils::removeFilesFiltered() instead");
59 return &FileUtils::removeFilesFiltered(@_);
[4]60}
61
[10211]62# recursive removal
63sub rm_r {
[27326]64 warnings::warnif("deprecated", "util::rm_r() is deprecated, using FileUtils::removeFilesRecursive() instead");
[27303]65 return &FileUtils::removeFilesRecursive(@_);
[10211]66}
67
[721]68# moves a file or a group of files
69sub mv {
[27303]70 warnings::warnif("deprecated", "util::mv() is deprecated, using FileUtils::moveFiles() instead");
71 return &FileUtils::moveFiles(@_);
[721]72}
73
[25554]74# Move the contents of source directory into target directory
75# (as opposed to merely replacing target dir with the src dir)
76# This can overwrite any files with duplicate names in the target
77# but other files and folders in the target will continue to exist
78sub mv_dir_contents {
[27303]79 warnings::warnif("deprecated", "util::mv_dir_contents() is deprecated, using FileUtils::moveDirectoryContents() instead");
80 return &FileUtils::moveDirectoryContents(@_);
[25554]81}
[721]82
[4]83# copies a file or a group of files
84sub cp {
[27303]85 warnings::warnif("deprecated", "util::cp() is deprecated, using FileUtils::copyFiles() instead");
86 return &FileUtils::copyFiles(@_);
[4]87}
88
89# recursively copies a file or group of files
[1454]90# syntax: cp_r (sourcefiles, destination directory)
91# destination must be a directory - to copy one file to
92# another use cp instead
[4]93sub cp_r {
[27303]94 warnings::warnif("deprecated", "util::cp_r() is deprecated, using FileUtils::copyFilesrecursive() instead");
95 return &FileUtils::copyFilesRecursive(@_);
96}
[4]97
[21762]98# recursively copies a file or group of files
99# syntax: cp_r (sourcefiles, destination directory)
100# destination must be a directory - to copy one file to
101# another use cp instead
102sub cp_r_nosvn {
[27303]103 warnings::warnif("deprecated", "util::cp_r_nosvn() is deprecated, using FileUtils::copyFilesRecursiveNoSVN() instead");
104 return &FileUtils::copyFilesRecursiveNoSVN(@_);
[21762]105}
106
[11179]107# copies a directory and its contents, excluding subdirectories, into a new directory
108sub cp_r_toplevel {
[27303]109 warnings::warnif("deprecated", "util::cp_r_toplevel() is deprecated, using FileUtils::recursiveCopyTopLevel() instead");
110 return &FileUtils::recursiveCopyTopLevel(@_);
[11179]111}
112
[721]113sub mk_dir {
[27303]114 warnings::warnif("deprecated", "util::mk_dir() is deprecated, using FileUtils::makeDirectory() instead");
115 return &FileUtils::makeDirectory(@_);
[721]116}
117
[1046]118# in case anyone cares - I did some testing (using perls Benchmark module)
119# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
120# slightly faster (surprisingly) - Stefan.
[4]121sub mk_all_dir {
[27303]122 warnings::warnif("deprecated", "util::mk_all_dir() is deprecated, using FileUtils::makeAllDirectories() instead");
123 return &FileUtils::makeAllDirectories(@_);
[4]124}
125
[619]126# make hard link to file if supported by OS, otherwise copy the file
127sub hard_link {
[27303]128 warnings::warnif("deprecated", "util::hard_link() is deprecated, using FileUtils::hardLink() instead");
129 return &FileUtils::hardLink(@_);
[619]130}
131
[2193]132# make soft link to file if supported by OS, otherwise copy file
[721]133sub soft_link {
[27303]134 warnings::warnif("deprecated", "util::soft_link() is deprecated, using FileUtils::softLink() instead");
135 return &FileUtils::softLink(@_);
[721]136}
137
[23362]138# Primarily for filenames generated by processing
139# content of HTML files (which are mapped to UTF-8 internally)
140#
141# To turn this into an octet string that really exists on the file
142# system:
143# 1. don't need to do anything special for Unix-based systems
144# (as underlying file system is byte-code)
145# 2. need to map to short DOS filenames for Windows
[721]146
[23362]147sub utf8_to_real_filename
148{
149 my ($utf8_filename) = @_;
[721]150
[23362]151 my $real_filename;
[721]152
[28375]153 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]154 require Win32;
[23388]155
[23362]156 my $unicode_filename = decode("utf8",$utf8_filename);
157 $real_filename = Win32::GetShortPathName($unicode_filename);
158 }
159 else {
160 $real_filename = $utf8_filename;
161 }
162
163 return $real_filename;
164}
165
[29794]166sub raw_filename_to_unicode
167{
[29810]168 my ($directory, $raw_file, $filename_encoding ) = @_;
[29794]169
[29810]170 my $unicode_filename = $raw_file;
171 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[29816]172 # Try turning a short version to the long version
173 # If there are "funny" characters in the file name, that can't be represented in the ANSI code, then we will have a short weird version, eg E74~1.txt
174 $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0));
175
176
177 if ($unicode_filename eq $raw_file) {
178 # This means the original filename *was* able to be encoded in the local ANSI file encoding (eg windows_1252), so now we turn it back to perl's unicode
[29810]179
[29816]180 $unicode_filename = &Encode::decode(locale_fs => $unicode_filename);
181 }
182 # else This means we did have one of the funny filenames. the getLongPathName (used in upgrade_if_dos_filename) will return unicode, so we don't need to do anything more.
183
[29810]184
185 } else {
[29816]186 # we had a utf-8 string, turn it into perl internal unicode
187 $unicode_filename = &Encode::decode("utf-8", $unicode_filename);
[29810]188
189
[29816]190 }
191 #Does the filename have url encoded chars in it?
192 if (&unicode::is_url_encoded($unicode_filename)) {
193 $unicode_filename = &unicode::url_decode($unicode_filename);
194 }
195
[29794]196 # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars
197 if ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
[29816]198 $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C'
199 }
[29794]200 return $unicode_filename;
201
202}
[27303]203sub fd_exists {
204 warnings::warnif("deprecated", "util::fd_exists() is deprecated, using FileUtils::fileTest() instead");
205 return &FileUtils::fileTest(@_);
[23362]206}
207
[27303]208sub file_exists {
209 warnings::warnif("deprecated", "util::file_exists() is deprecated, using FileUtils::fileExists() instead");
210 return &FileUtils::fileExists(@_);
[23362]211}
212
[27303]213sub dir_exists {
214 warnings::warnif("deprecated", "util::dir_exists() is deprecated, using FileUtils::directoryExists() instead");
215 return &FileUtils::directoryExists(@_);
[23362]216}
217
[4]218# updates a copy of a directory in some other part of the filesystem
219# verbosity settings are: 0=low, 1=normal, 2=high
220# both $fromdir and $todir should be absolute paths
221sub cachedir {
[27303]222 warnings::warnif("deprecated", "util::cachedir() is deprecated, using FileUtils::synchronizeDirectories() instead");
223 return &FileUtils::synchronizeDirectories(@_);
[4]224}
225
226# this function returns -1 if either file is not found
227# assumes that $file1 and $file2 are absolute file names or
228# in the current directory
229# $file2 is allowed to be newer than $file1
230sub differentfiles {
[27303]231 warnings::warnif("deprecated", "util::differentfiles() is deprecated, using FileUtils::differentFiles() instead");
232 return &FileUtils::differentFiles(@_);
[4]233}
234
235
[30574]236# works out the temporary directory, including in the case where Greenstone is not writable
237# In that case, gs3-setup.bat would already have set the GS_TMP_OUTPUT_DIR temp variable
238sub determine_tmp_dir
239{
240 my $try_collect_dir = shift(@_) || 0;
241
242 my $tmp_dirname;
243 if(defined $ENV{'GS_TMP_OUTPUT_DIR'}) {
244 $tmp_dirname = $ENV{'GS_TMP_OUTPUT_DIR'};
245 } elsif($try_collect_dir && defined $ENV{'GSDLCOLLECTDIR'}) {
246 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
247 } elsif(defined $ENV{'GSDLHOME'}) {
248 $tmp_dirname = $ENV{'GSDLHOME'};
249 } else {
250 return undef;
251 }
252
253 if(!defined $ENV{'GS_TMP_OUTPUT_DIR'}) {
254 # test the tmp_dirname folder is writable, by trying to write out a file
255 # Unfortunately, cound not get if(-w $dirname) to work on directories on Windows
256 ## http://alvinalexander.com/blog/post/perl/perl-file-test-operators-reference-cheat-sheet (test file/dir writable)
257 ## http://www.makelinux.net/alp/083 (real and effective user IDs)
258
259 my $tmp_test_file = &FileUtils::filenameConcatenate($tmp_dirname, "writability_test.tmp");
[30575]260 if (open (FOUT, ">$tmp_test_file")) {
[30574]261 close(FOUT);
262 &FileUtils::removeFiles($tmp_test_file);
263 } else { # location not writable, use TMP location
[31694]264 if (defined $ENV{'TMP'}) {
265 $tmp_dirname = $ENV{'TMP'};
266 } else {
267 $tmp_dirname = "/tmp";
268 }
269 $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "greenstone");
[30574]270 $ENV{'GS_TMP_OUTPUT_DIR'} = $tmp_dirname; # store for next time
271 }
272 }
273
274 $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "tmp");
275 &FileUtils::makeAllDirectories ($tmp_dirname) unless -e $tmp_dirname;
276
277 return $tmp_dirname;
278}
279
[16266]280sub get_tmp_filename
281{
282 my $file_ext = shift(@_) || undef;
283
[22438]284 my $opt_dot_file_ext = "";
285 if (defined $file_ext) {
286 if ($file_ext !~ m/\./) {
287 # no dot, so needs one added in at start
288 $opt_dot_file_ext = ".$file_ext"
289 }
290 else {
291 # allow for "extensions" such as _metadata.txt to be handled
292 # gracefully
293 $opt_dot_file_ext = $file_ext;
294 }
295 }
[16266]296
[31514]297 my $tmpdir = &util::determine_tmp_dir(0);
[4]298
299 my $count = 1000;
300 my $rand = int(rand $count);
[27303]301 my $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[16266]302
303 while (-e $full_tmp_filename) {
[4]304 $rand = int(rand $count);
[27303]305 $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[4]306 $count++;
307 }
[16266]308
309 return $full_tmp_filename;
[4]310}
311
[28066]312# These 2 are "static" variables used by the get_timestamped_tmp_folder() subroutine below and
313# belong with that function. They help ensure the timestamped tmp folders generated are unique.
314my $previous_timestamp = undef;
315my $previous_timestamp_f = 0; # frequency
316
[22886]317sub get_timestamped_tmp_folder
[22873]318{
[30574]319 my $tmp_dirname = &util::determine_tmp_dir(1);
[28066]320
[22873]321 # add the timestamp into the path otherwise we can run into problems
322 # if documents have the same name
[28066]323 my $timestamp = time;
324
325 if (!defined $previous_timestamp || ($timestamp > $previous_timestamp)) {
326 $previous_timestamp_f = 0;
327 $previous_timestamp = $timestamp;
328 } else {
329 $previous_timestamp_f++;
330 }
331
[27303]332 my $time_tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, $timestamp);
[28066]333 $tmp_dirname = $time_tmp_dirname;
334 my $i = $previous_timestamp_f;
335
336 if($previous_timestamp_f > 0) {
337 $tmp_dirname = $time_tmp_dirname."_".$i;
338 $i++;
339 }
[22873]340 while (-e $tmp_dirname) {
[28066]341 $tmp_dirname = $time_tmp_dirname."_".$i;
[22873]342 $i++;
343 }
[28066]344 &FileUtils::makeDirectory($tmp_dirname);
345
[22886]346 return $tmp_dirname;
347}
[22873]348
[22886]349sub get_timestamped_tmp_filename_in_collection
350{
351
352 my ($input_filename, $output_ext) = @_;
353 # derive tmp filename from input filename
354 my ($tailname, $dirname, $suffix)
355 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
356
357 # softlink to collection tmp dir
358 my $tmp_dirname = &util::get_timestamped_tmp_folder();
359 $tmp_dirname = $dirname unless defined $tmp_dirname;
360
[22873]361 # following two steps copied from ConvertBinaryFile
[22886]362 # do we need them?? can't use them as is, as they use plugin methods.
363
[22873]364 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
365
366 # URLEncode this since htmls with images where the html filename is utf8 don't seem
367 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
368 # files on the filesystem.
369 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
370 if (defined $output_ext) {
371 $output_ext = ".$output_ext"; # add the dot
372 } else {
373 $output_ext = $suffix;
374 }
375 $output_ext= lc($output_ext);
[27303]376 my $tmp_filename = &FileUtils::filenameConcatenate($tmp_dirname, "$tailname$output_ext");
[22873]377
378 return $tmp_filename;
379}
380
[21218]381sub get_toplevel_tmp_dir
382{
[27303]383 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
[21218]384}
385
386
[28460]387sub get_collectlevel_tmp_dir
388{
389 my $tmp_dirname = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "tmp");
390 &FileUtils::makeDirectory($tmp_dirname) if (!-e $tmp_dirname);
391
392 return $tmp_dirname;
393}
394
[31513]395sub get_parent_folder
396{
397 my ($path) = @_;
398 my ($tailname, $dirname, $suffix)
399 = &File::Basename::fileparse($path, "\\.[^\\.]+\$");
400
401 return &FileUtils::sanitizePath($dirname);
402}
403
[17512]404sub filename_to_regex {
405 my $filename = shift (@_);
[4]406
[24971]407 # need to make single backslashes double so that regex works
[24832]408 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
[24829]409
[24832]410 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
411 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
[24829]412 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
413 $filename =~ s@\(@\\(@g; # escape brackets
414 $filename =~ s@\)@\\)@g; # escape brackets
[24932]415 $filename =~ s@\[@\\[@g; # escape brackets
416 $filename =~ s@\]@\\]@g; # escape brackets
[24829]417
[17512]418 return $filename;
419}
420
[24829]421sub unregex_filename {
422 my $filename = shift (@_);
423
424 # need to put doubled backslashes for regex back to single
425 $filename =~ s/\\\./\./g; # remove RE syntax for .
426 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
427 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
[24932]428 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
429 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
[24940]430
431 # \\ goes to \
432 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
433 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
[24829]434 return $filename;
435}
436
[4]437sub filename_cat {
[27303]438 # I've disabled this warning for now, as every Greenstone perl
439 # script seems to make use of this function and so you drown in a
440 # sea of deprecated warnings [jmt12]
441# warnings::warnif("deprecated", "util::filename_cat() is deprecated, using FileUtils::filenameConcatenate() instead");
442 return &FileUtils::filenameConcatenate(@_);
[4]443}
444
[21413]445
[28394]446sub _pathname_cat {
447 my $join_char = shift(@_);
448 my $first_path = shift(@_);
[21413]449 my (@pathnames) = @_;
450
451 # If first_path is not null or empty, then add it back into the list
452 if (defined $first_path && $first_path =~ /\S/) {
453 unshift(@pathnames, $first_path);
454 }
455
456 my $pathname = join($join_char, @pathnames);
457
458 # remove duplicate slashes
[28394]459 if ($join_char eq ";") {
[21413]460 $pathname =~ s/[\\\/]+/\\/g;
[28460]461 if ($^O eq "cygwin") {
462 # Once we've collapsed muliple (potentialy > 2) slashes
463 # For cygwin, actually want things double-backslahed
464 $pathname =~ s/\\/\\\\/g;
465 }
466
[21413]467 } else {
468 $pathname =~ s/[\/]+/\//g;
469 # DB: want a pathname abc\de.html to remain like this
470 }
471
472 return $pathname;
473}
474
475
[28394]476sub pathname_cat {
477 my (@pathnames) = @_;
478
479 my $join_char;
480 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
481 $join_char = ";";
482 } else {
483 $join_char = ":";
484 }
485 return _pathname_cat($join_char,@pathnames);
486}
487
488
489sub javapathname_cat {
490 my (@pathnames) = @_;
491
492 my $join_char;
493
494 # Unlike pathname_cat() above, not interested if running in a Cygwin environment
495 # This is because the java we run is actually a native Windows executable
496
497 if (($ENV{'GSDLOS'} =~ /^windows$/i)) {
498 $join_char = ";";
499 } else {
500 $join_char = ":";
501 }
502 return _pathname_cat($join_char,@pathnames);
503}
504
505
[28460]506sub makeFilenameJavaCygwinCompatible
507{
508 my ($java_filename) = @_;
[28394]509
[28460]510 if ($^O eq "cygwin") {
511 # To be used with a Java program, but under Cygwin
512 # Because the java binary that is native to Windows, need to
513 # convert the Cygwin paths (i.e. Unix style) to be Windows
514 # compatible
515
516 $java_filename = `cygpath -wp "$java_filename"`;
517 chomp($java_filename);
518 $java_filename =~ s%\\%\\\\%g;
519 }
520
521 return $java_filename;
522}
523
[19616]524sub tidy_up_oid {
525 my ($OID) = @_;
[29112]526 if ($OID =~ /[\.\/\\]/) {
527 print STDERR "Warning, identifier $OID contains periods or slashes(.\\\/), replacing them with _\n";
528 $OID =~ s/[\.\\\/]/_/g; #remove any periods
[19616]529 }
530 if ($OID =~ /^\s.*\s$/) {
531 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
532 # remove starting and trailing whitespace
533 $OID =~ s/^\s+//;
534 $OID =~ s/\s+$//;
535 }
536 if ($OID =~ /^[\d]*$/) {
537 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
538 $OID = "D" . $OID;
539 }
540
541 return $OID;
542}
[26206]543
[10212]544sub envvar_prepend {
545 my ($var,$val) = @_;
546
[26206]547 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
[28375]548## my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[26206]549
[28375]550 # Rewritten above to make ":" the default (Windows is the special
551 # case, anything else 'unusual' such as Solaris etc is Unix)
552 my $pathsep = (defined $ENV{'GSDLOS'} && (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin"))) ? ";" : ":";
553
[16404]554 # do not prepend any value/path that's already in the environment variable
[24832]555
556 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
557 if (!defined($ENV{$var})) {
558 $ENV{$var} = "$val";
[16442]559 }
[24832]560 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]561 $ENV{$var} = "$val".$pathsep.$ENV{$var};
[10212]562 }
563}
564
565sub envvar_append {
566 my ($var,$val) = @_;
[26206]567
568 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
569 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[24832]570
[16404]571 # do not append any value/path that's already in the environment variable
[26206]572
[24832]573 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
574 if (!defined($ENV{$var})) {
575 $ENV{$var} = "$val";
[16442]576 }
[24832]577 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]578 $ENV{$var} = $ENV{$var}.$pathsep."$val";
[24832]579 }
[10212]580}
581
[31862]582# debug aid
583sub print_env {
584 my @envvars = @_;
[16442]585
[31862]586 if (scalar(@envvars) == 0) {
587 print STDERR "@@@ No env vars requested\n";
588
589
590 my $output;
591 if (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} =~ m/windows/) {
592 $output = `set`;
593 }
594 else {
595 $output = `env`;
596 }
597
598 print STDERR "@@@ Environment was:\n********\n$output\n*******\n";
599 } else {
600 print STDERR "@@@ Environment was:\n********\n";
601 foreach my $envvar (@envvars) {
602 if(defined $ENV{$envvar}) {
603 print STDERR "\t$envvar = ".$ENV{$envvar}."\n";
604 } else {
605 $envvar = uc($envvar);
606 if(defined $ENV{$envvar}) {
607 print STDERR "\t$envvar = ".$ENV{$envvar}."\n";
608 } else {
609 print STDERR "Env var $envvar was not set\n";
610 }
611 }
612 }
613 print STDERR "@@@*******\n";
614 }
615}
[28394]616
[31862]617
[16380]618# splits a filename into a prefix and a tail extension using the tail_re, or
619# if that fails, splits on the file_extension . (dot)
620sub get_prefix_and_tail_by_regex {
[10212]621
[16380]622 my ($filename,$tail_re) = @_;
623
624 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
625 if ((!defined $file_prefix) || (!defined $file_ext)) {
626 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
627 }
628
629 return ($file_prefix,$file_ext);
630}
631
632# get full path and file only path from a base_dir (which may be empty) and
633# file (which may contain directories)
634sub get_full_filenames {
635 my ($base_dir, $file) = @_;
636
[28375]637# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
638# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
639# print STDERR "** Calling method: $lcfilename:$cline $cpackage->$csubr\n";
640
641
[16380]642 my $filename_full_path = $file;
643 # add on directory if present
[27303]644 $filename_full_path = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\S/;
[16380]645
646 my $filename_no_path = $file;
647
648 # remove directory if present
649 $filename_no_path =~ s/^.*[\/\\]//;
650 return ($filename_full_path, $filename_no_path);
651}
652
[8682]653# returns the path of a file without the filename -- ie. the directory the file is in
654sub filename_head {
655 my $filename = shift(@_);
656
[28375]657 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[8682]658 $filename =~ s/[^\\\\]*$//;
659 }
660 else {
661 $filename =~ s/[^\\\/]*$//;
662 }
663
664 return $filename;
665}
666
667
[23362]668
[1454]669# returns 1 if filename1 and filename2 point to the same
670# file or directory
671sub filenames_equal {
672 my ($filename1, $filename2) = @_;
673
674 # use filename_cat to clean up trailing slashes and
675 # multiple slashes
[27303]676 $filename1 = &FileUtils::filenameConcatenate($filename1);
677 $filename2 = &FileUtils::filenameConcatenate($filename2);
[1454]678
679 # filenames not case sensitive on windows
680 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
681 $filename1 =~ tr/[A-Z]/[a-z]/;
682 $filename2 =~ tr/[A-Z]/[a-z]/;
683 }
684 return 1 if $filename1 eq $filename2;
685 return 0;
686}
687
[24932]688# If filename is relative to within_dir, returns the relative path of filename to that directory
689# with slashes in the filename returned as they were in the original (absolute) filename.
[23362]690sub filename_within_directory
691{
692 my ($filename,$within_dir) = @_;
693
[23371]694 if ($within_dir !~ m/[\/\\]$/) {
695 my $dirsep = &util::get_dirsep();
[23362]696 $within_dir .= $dirsep;
697 }
698
[24829]699 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]700 if ($filename =~ m/^$within_dir(.*)$/) {
701 $filename = $1;
702 }
703
704 return $filename;
705}
706
[24932]707# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
708# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
709# The subpath returned will also be a URL type filename.
710sub filename_within_directory_url_format
711{
712 my ($filename,$within_dir) = @_;
713
714 # convert parameters only to / slashes if Windows
715
[24971]716 my $filename_urlformat = &filepath_to_url_format($filename);
717 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
718
[24932]719 #if ($within_dir_urlformat !~ m/\/$/) {
720 # make sure directory ends with a slash
721 #$within_dir_urlformat .= "/";
722 #}
723
724 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
725
726 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
727
728 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
729 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
730 $filename_urlformat = $1;
731 }
732
733 return $filename_urlformat;
734}
735
[24971]736# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
737# since on Linux it doesn't represent a file separator but an escape char).
738sub filepath_to_url_format
739{
740 my ($filepath) = @_;
[28375]741 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[24971]742 # Only need to worry about Windows, as Unix style directories already in url-format
743 # Convert Windows style \ => /
744 $filepath =~ s@\\@/@g;
745 }
746 return $filepath;
747}
[24932]748
[25093]749# regex filepaths on windows may include \\ as path separator. Convert \\ to /
750sub filepath_regex_to_url_format
751{
752 my ($filepath) = @_;
[28375]753 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[25093]754 # Only need to worry about Windows, as Unix style directories already in url-format
755 # Convert Windows style \\ => /
756 $filepath =~ s@\\\\@/@g;
757 }
758 return $filepath;
759
760}
[24971]761
[25093]762# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
763# and ignores trailing /
764# returns (file, dirs) dirs will be empty if no subdirs
765sub url_fileparse
766{
767 my ($filepath) = @_;
768 # remove trailing /
769 $filepath =~ s@/$@@;
770 if ($filepath !~ m@/@) {
771 return ($filepath, "");
772 }
773 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
774 return ($file, $dirs);
775
776}
777
778
[10281]779sub filename_within_collection
780{
781 my ($filename) = @_;
782
783 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
784
785 if (defined $collect_dir) {
[23362]786
[15875]787 # if from within GSDLCOLLECTDIR, then remove directory prefix
788 # so source_filename is realative to it. This is done to aid
789 # portability, i.e. the collection can be moved to somewhere
790 # else on the file system and the archives directory will still
791 # work. This is needed, for example in the applet version of
792 # GLI where GSDLHOME/collect on the server will be different to
793 # the collect directory of the remove user. Of course,
794 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
795 # it back into a full pathname.
[23362]796
797 $filename = filename_within_directory($filename,$collect_dir);
[10281]798 }
799
800 return $filename;
801}
802
[23362]803sub prettyprint_file
804{
[23484]805 my ($base_dir,$file,$gli) = @_;
[23362]806
[27303]807 my $filename_full_path = &FileUtils::filenameConcatenate($base_dir,$file);
[23362]808
[28375]809 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]810 require Win32;
811
812 # For some reason base_dir in the form c:/a/b/c
813 # This leads to confusion later on, so turn it back into
814 # the more usual Windows form
815 $base_dir =~ s/\//\\/g;
816 my $long_base_dir = Win32::GetLongPathName($base_dir);
817 my $long_full_path = Win32::GetLongPathName($filename_full_path);
818
819 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]820 $file = encode("utf8",$file) if ($gli);
[23362]821 }
822
823 return $file;
824}
825
826
827sub upgrade_if_dos_filename
828{
[23371]829 my ($filename_full_path,$and_encode) = @_;
[23362]830
[28375]831 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]832 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
833 # to its long (Windows) version
[23416]834 my $long_filename = Win32::GetLongPathName($filename_full_path);
835 if (defined $long_filename) {
[29810]836
[23416]837 $filename_full_path = $long_filename;
838 }
[23362]839 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]840 $filename_full_path =~ s/^(.):/\u$1:/;
[29810]841
[23371]842 if ((defined $and_encode) && ($and_encode)) {
843 $filename_full_path = encode("utf8",$filename_full_path);
844 }
[23362]845 }
846
847 return $filename_full_path;
848}
849
850
[23388]851sub downgrade_if_dos_filename
852{
853 my ($filename_full_path) = @_;
854
[28375]855 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23388]856 require Win32;
857
858 # Ensure the given long Windows filename is in a form that can
859 # be opened by Perl => convert it to a short DOS-like filename
860
[23414]861 my $short_filename = Win32::GetShortPathName($filename_full_path);
862 if (defined $short_filename) {
863 $filename_full_path = $short_filename;
864 }
[23416]865 # Make sure initial drive letter is lower-case (to fit in
866 # with rest of Greenstone)
[23483]867 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]868 }
869
870 return $filename_full_path;
871}
872
873
[18441]874sub filename_is_absolute
875{
[27303]876 warnings::warnif("deprecated", "util::filename_is_absolute() is deprecated, using FileUtils::isFilenameAbsolute() instead");
877 return &FileUtils::isFilenameAbsolute(@_);
[18441]878}
879
880
[17572]881## @method make_absolute()
882#
883# Ensure the given file path is absolute in respect to the given base path.
884#
885# @param $base_dir A string denoting the base path the given dir must be
886# absolute to.
887# @param $dir The directory to be made absolute as a string. Note that the
888# dir may already be absolute, in which case it will remain
889# unchanged.
890# @return The now absolute form of the directory as a string.
891#
892# @author John Thompson, DL Consulting Ltd.
893# @copy 2006 DL Consulting Ltd.
894#
895#used in buildcol.pl, doesn't work for all cases --kjdon
896sub make_absolute {
897
898 my ($base_dir, $dir) = @_;
[18441]899### print STDERR "dir = $dir\n";
[17572]900 $dir =~ s/[\\\/]+/\//g;
901 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
902 $dir =~ s|^/tmp_mnt||;
903 1 while($dir =~ s|/[^/]*/\.\./|/|g);
904 $dir =~ s|/[.][.]?/|/|g;
905 $dir =~ tr|/|/|s;
[18441]906### print STDERR "dir = $dir\n";
[17572]907
908 return $dir;
909}
910## make_absolute() ##
[10281]911
[7929]912sub get_dirsep {
913
[28375]914 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[7929]915 return "\\";
916 } else {
917 return "\/";
918 }
919}
920
[619]921sub get_os_dirsep {
[4]922
[28375]923 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[619]924 return "\\\\";
925 } else {
926 return "\\\/";
927 }
928}
929
930sub get_re_dirsep {
931
932 return "\\\\|\\\/";
933}
934
935
[15003]936sub get_dirsep_tail {
937 my ($filename) = @_;
938
939 # returns last part of directory or filename
940 # On unix e.g. a/b.d => b.d
941 # a/b/c => c
942
[15088]943 my $dirsep = get_re_dirsep();
944 my @dirs = split (/$dirsep/, $filename);
945 my $tail = pop @dirs;
[15003]946
[15088]947 # - caused problems under windows
948 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
949
[15003]950 return $tail;
951}
952
953
[4]954# if this is running on windows we want binaries to end in
955# .exe, otherwise they don't have to end in any extension
956sub get_os_exe {
[28375]957 return ".exe" if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"));
[4]958 return "";
959}
960
961
[86]962# test to see whether this is a big or little endian machine
[15713]963sub is_little_endian
964{
965 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
966 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
967 # Otherwise, it's little endian
968
969 #return 0 if $^O =~ /^darwin$/i;
[17714]970 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
971
972 # Going back to stating exactly whether the machine is little endian
973 # or big endian, without any special case for Macs. Since for rata it comes
974 # back with little endian and for shuttle with bigendian.
[15713]975 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]976}
[4]977
[86]978
[135]979# will return the collection name if successful, "" otherwise
980sub use_collection {
[1454]981 my ($collection, $collectdir) = @_;
[135]982
[1454]983 if (!defined $collectdir || $collectdir eq "") {
[27303]984 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect");
[1454]985 }
986
[28211]987 if (!defined $ENV{'GREENSTONEHOME'}) { # for GS3, would have been defined in use_site_collection, to GSDL3HOME
988 $ENV{'GREENSTONEHOME'} = $ENV{'GSDLHOME'};
989 }
990
[135]991 # get and check the collection
992 if (!defined($collection) || $collection eq "") {
993 if (defined $ENV{'GSDLCOLLECTION'}) {
994 $collection = $ENV{'GSDLCOLLECTION'};
995 } else {
[2359]996 print STDOUT "No collection specified\n";
[135]997 return "";
998 }
999 }
1000
1001 if ($collection eq "modelcol") {
[2359]1002 print STDOUT "You can't use modelcol.\n";
[135]1003 return "";
1004 }
1005
1006 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1007 # are defined
[17204]1008 $ENV{'GSDLCOLLECTION'} = $collection;
[28211]1009 $ENV{'GSDLCOLLECTHOME'} = $collectdir;
[27303]1010 $ENV{'GSDLCOLLECTDIR'} = &FileUtils::filenameConcatenate($collectdir, $collection);
[135]1011
1012 # make sure this collection exists
1013 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]1014 print STDOUT "Invalid collection ($collection).\n";
[135]1015 return "";
1016 }
1017
1018 # everything is ready to go
1019 return $collection;
1020}
1021
[21207]1022sub get_current_collection_name {
1023 return $ENV{'GSDLCOLLECTION'};
1024}
[14926]1025
1026
1027# will return the collection name if successful, "" otherwise.
1028# Like use_collection (above) but for greenstone 3 (taking account of site level)
1029
1030sub use_site_collection {
1031 my ($site, $collection, $collectdir) = @_;
1032
1033 if (!defined $collectdir || $collectdir eq "") {
1034 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
[27303]1035 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'}, "sites", $site, "collect");
[14926]1036 }
1037
[28211]1038 if (defined $ENV{'GSDL3HOME'}) {
1039 $ENV{'GREENSTONEHOME'} = $ENV{'GSDL3HOME'};
1040 $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
1041 } elsif (defined $ENV{'GSDL3SRCHOME'}) {
1042 $ENV{'GREENSTONEHOME'} = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "web");
1043 $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
1044 } else {
1045 print STDERR "*** util::use_site_collection(). Warning: Neither GSDL3HOME nor GSDL3SRCHOME set.\n";
1046 }
1047
[14926]1048 # collectdir explicitly set by this point (using $site variable if required).
1049 # Can call "old" gsdl2 use_collection now.
1050
1051 return use_collection($collection,$collectdir);
1052}
1053
1054
1055
[15018]1056sub locate_config_file
1057{
1058 my ($file) = @_;
1059
1060 my $locations = locate_config_files($file);
1061
1062 return shift @$locations; # returns undef if 'locations' is empty
1063}
1064
1065
1066sub locate_config_files
1067{
1068 my ($file) = @_;
1069
1070 my @locations = ();
1071
1072 if (-e $file) {
1073 # Clearly specified (most likely full filename)
1074 # No need to hunt in 'etc' directories, return value unchanged
1075 push(@locations,$file);
1076 }
1077 else {
1078 # Check for collection specific one before looking in global GSDL 'etc'
[16969]1079 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1080 my $test_collect_etc_filename
[27303]1081 = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"etc", $file);
[16969]1082
1083 if (-e $test_collect_etc_filename) {
1084 push(@locations,$test_collect_etc_filename);
1085 }
[15018]1086 }
1087 my $test_main_etc_filename
[27303]1088 = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"etc", $file);
[15018]1089 if (-e $test_main_etc_filename) {
1090 push(@locations,$test_main_etc_filename);
1091 }
1092 }
1093
1094 return \@locations;
1095}
1096
1097
[9955]1098sub hyperlink_text
1099{
1100 my ($text) = @_;
1101
1102 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1103 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1104
1105 return $text;
1106}
1107
1108
[16436]1109# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1110# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
[27303]1111sub is_dir_empty {
1112 warnings::warnif("deprecated", "util::is_dir_empty() is deprecated, using FileUtils::isDirectoryEmpty() instead");
1113 return &FileUtils::isDirectoryEmpty(@_);
[16436]1114}
1115
[18337]1116# Returns the given filename converted using either URL encoding or base64
1117# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]1118# (if it is just the tailname), then $no_suffix should be some defined value.
1119# rename_method can be url, none, base64
[18319]1120sub rename_file {
[18337]1121 my ($filename, $rename_method, $no_suffix) = @_;
[18329]1122
[18337]1123 if(!$filename) { # undefined or empty string
[18329]1124 return $filename;
1125 }
[18319]1126
[20413]1127 if (!$rename_method) {
1128 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1129 # Debugging information
[22856]1130 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1131 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]1132 $rename_method = "url";
1133 } elsif($rename_method eq "none") {
1134 return $filename; # would have already been renamed
1135 }
1136
[19762]1137 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1138 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1139 ###$filename =~ s/ /_/g;
[18337]1140
1141 my ($tailname,$dirname,$suffix);
1142 if($no_suffix) { # given a tailname, no suffix
1143 ($tailname,$dirname) = File::Basename::fileparse($filename);
1144 }
1145 else {
1146 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1147 }
[23388]1148 if (!$suffix) {
1149 $suffix = "";
1150 }
[26973]1151 # This breaks GLI matching extracted metadata to files in Enrich panel, as
1152 # original is eg .JPG while gsdlsourcefilename ends up .jpg
1153 # Not sure why it was done in first place...
1154 #else {
1155 #$suffix = lc($suffix);
1156 #}
[18337]1157
[20413]1158 if ($rename_method eq "url") {
[18319]1159 $tailname = &unicode::url_encode($tailname);
1160 }
1161 elsif ($rename_method eq "base64") {
[18341]1162 $tailname = &unicode::base64_encode($tailname);
[18319]1163 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1164 }
[18326]1165
[18319]1166 $filename = "$tailname$suffix";
[18326]1167 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]1168
1169 return $filename;
1170}
1171
[21616]1172
1173# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]1174sub rename_ldb_or_bdb_file {
[18657]1175 my ($filename_no_ext) = @_;
1176
1177 my $new_filename = "$filename_no_ext.gdb";
[21615]1178 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]1179 # try ldb
1180 my $old_filename = "$filename_no_ext.ldb";
1181
1182 if (-f $old_filename) {
[19056]1183 print STDERR "Renaming $old_filename to $new_filename\n";
1184 rename ($old_filename, $new_filename)
1185 || print STDERR "Rename failed: $!\n";
[18657]1186 return;
1187 }
1188 # try bdb
1189 $old_filename = "$filename_no_ext.bdb";
1190 if (-f $old_filename) {
[19056]1191 print STDERR "Renaming $old_filename to $new_filename\n";
1192 rename ($old_filename, $new_filename)
1193 || print STDERR "Rename failed: $!\n";
[18657]1194 return;
1195 }
1196}
1197
[24874]1198sub os_dir() {
1199
1200 my $gsdlarch = "";
1201 if(defined $ENV{'GSDLARCH'}) {
1202 $gsdlarch = $ENV{'GSDLARCH'};
1203 }
1204 return $ENV{'GSDLOS'}.$gsdlarch;
1205}
[18657]1206
[28997]1207# returns 1 if this (GS server) is a GS3 installation, returns 0 if it's GS2.
1208sub is_gs3() {
1209 if($ENV{'GSDL3SRCHOME'}) {
1210 return 1;
1211 } else {
1212 return 0;
1213 }
1214}
1215
[21719]1216# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1217# By default, /greenstone3 for GS3 or /greenstone for GS2.
1218sub get_greenstone_url_prefix() {
1219 # if already set on a previous occasion, just return that
1220 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1221 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1222
[21719]1223 my ($configfile, $urlprefix, $defaultUrlprefix);
1224 my @propertynames = ();
1225
1226 if($ENV{'GSDL3SRCHOME'}) {
1227 $defaultUrlprefix = "/greenstone3";
[27303]1228 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
[21719]1229 push(@propertynames, qw/path\s*\=/);
1230 } else {
1231 $defaultUrlprefix = "/greenstone";
[27303]1232 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
[21719]1233 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1234 }
1235
1236 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1237
1238 if(!$urlprefix) { # no values found for URL prefix, use default values
1239 $urlprefix = $defaultUrlprefix;
1240 } else {
1241 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1242 $urlprefix =~ s/^\///; # remove the starting slash
1243 my @dirs = split(/(\\|\/)/, $urlprefix);
1244 $urlprefix = shift(@dirs);
1245
1246 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1247 $urlprefix = "/$urlprefix";
1248 }
1249 }
1250
1251 # set for the future
1252 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1253# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1254 return $urlprefix;
1255}
1256
1257
[28460]1258
1259#
[30561]1260# The following comes from activate.pl
[28460]1261#
1262# Designed to work with a server included with GS.
1263# - For GS3, we ask ant for the library URL.
1264# - For GS2, we derive the URL from the llssite.cfg file.
1265
1266sub get_full_greenstone_url_prefix
1267{
[31518]1268 my ($gs_mode, $lib_name) = @_;
1269
[28460]1270 # if already set on a previous occasion, just return that
1271 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
[31517]1272 return $ENV{'FULL_GREENSTONE_URL_PREFIX'} if($ENV{'FULL_GREENSTONE_URL_PREFIX'});
[28460]1273
[31518]1274 # set gs_mode if it was not passed in (servercontrol.pm would pass it in, any other callers won't)
1275 $gs_mode = ($ENV{'GSDL3SRCHOME'}) ? "gs3" : "gs2" unless defined $gs_mode;
[28460]1276
1277 my $url = undef;
1278
1279 if($gs_mode eq "gs2") {
1280 my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
1281
1282 if(-f $llssite_cfg) {
1283 # check llssite.cfg for line with url property
1284 # for server.exe also need to use portnumber and enterlib properties
1285
1286 # Read in the entire contents of the file in one hit
1287 if (!open (FIN, $llssite_cfg)) {
[31515]1288 print STDERR "util::get_full_greenstone_url_prefix() failed to open $llssite_cfg ($!)\n";
[28460]1289 return undef;
1290 }
1291
1292 my $contents;
1293 sysread(FIN, $contents, -s FIN);
1294 close(FIN);
1295
1296 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
1297 my $enterlib = "";
[30561]1298 my $portnumber = "8282"; # will remain empty (implicit port 80) unless it's specifically been assigned
[28460]1299
1300 foreach my $line (@lines) {
1301 if($line =~ m/^url=(.*)$/) {
1302 $url = $1;
1303 } elsif($line =~ m/^enterlib=(.*)$/) {
1304 $enterlib = $1;
1305 } elsif($line =~ m/^portnumber=(.*)$/) {
1306 $portnumber = $1;
1307 }
1308 }
1309
1310 if(!$url) {
1311 return undef;
1312 }
1313 elsif($url eq "URL_pending") { # library is not running
1314 # do not process url=URL_pending in the file, since for server.exe
1315 # this just means the Enter Library button hasn't been pressed yet
1316 $url = undef;
1317 }
1318 else {
1319 # In the case of server.exe, need to do extra work to get the proper URL
1320 # But first, need to know whether we're indeed dealing with server.exe:
1321
1322 # compare the URL's domain to the full URL
1323 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
1324 my $uri = URI->new( $url );
1325 my $host = $uri->host;
1326 #print STDERR "@@@@@ host: $host\n";
[31514]1327 if($url =~ m/https?:\/\/$host(\/)?$/) {
[31515]1328 #if($url !~ m/https?:\/\/$host:$portnumber(\/)?/ || $url =~ m/https?:\/\/$host(\/)?$/) {
[28460]1329 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
1330 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
1331 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
1332 $url = $url.":".$portnumber.$enterlib;
1333 } # else, apache web server
1334
1335 }
1336 }
1337 } elsif($gs_mode eq "gs3") {
1338 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
1339 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
1340
1341 # Or, run the new target get-default-servlet-url
1342 # the output can look like:
1343 #
1344 # Buildfile: build.xml
1345 # [echo] os.name: Windows Vista
1346 #
1347 # get-default-servlet-url:
1348 # [echo] http://localhost:8383/greenstone3/library
1349 # BUILD SUCCESSFUL
1350 # Total time: 0 seconds
1351
1352 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
1353 # - see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
1354
1355 # The get-default-servlet-url ant target can be run from anywhere by specifying the
1356 # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
1357 # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
1358 # to run the activate.pl script in the first place
1359
1360 my $full_build_xml = &FileUtils::javaFilenameConcatenate($ENV{'GSDL3SRCHOME'},"build.xml");
1361
1362 my $perl_command = "ant -buildfile \"$full_build_xml\" get-default-servlet-url";
1363
1364 if (open(PIN, "$perl_command |")) {
1365 while (defined (my $perl_output_line = <PIN>)) {
1366
[31514]1367 if($perl_output_line =~ m@(https?):\/\/(\S*)@) { # grab all the non-whitespace chars
1368 $url="$1://".$2; # preserve the http protocol #$url="http://".$1;
[28460]1369 }
1370 }
1371 close(PIN);
[31518]1372
1373 if (defined $lib_name) { # url won't be undef now
1374 # replace the servlet_name portion of the url found, with the given library_name
1375 $url =~ s@/[^/]*$@/$lib_name@;
1376 }
[28460]1377 } else {
[31515]1378 print STDERR "util::get_full_greenstone_url_prefix() failed to run $perl_command to work out library URL for $gs_mode\n";
[31514]1379 }
[28460]1380 }
1381
1382 # either the url is still undef or it is now set
1383 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
1384 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
1385
1386 $ENV{'FULL_GREENSTONE_URL_PREFIX'} = $url;
1387
1388 return $url;
1389}
1390
1391
[21719]1392# Given a config file (xml or java properties file) and a list/array of regular expressions
1393# that represent property names to match on, this function will return the value for the 1st
1394# matching property name. If the return value is undefined, no matching property was found.
1395sub extract_propvalue_from_file() {
1396 my ($configfile, $propertynames) = @_;
1397
1398 my $value;
1399 unless(open(FIN, "<$configfile")) {
1400 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1401 return $value; # not initialised
1402 }
1403
1404 # Read the entire file at once, as one single line, then close it
1405 my $filecontents;
1406 {
1407 local $/ = undef;
1408 $filecontents = <FIN>;
1409 }
1410 close(FIN);
1411
1412 foreach my $regex (@$propertynames) {
1413 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1414 if($value) {
1415 $value =~ s/^\"//; # remove any startquotes
1416 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1417 last; # found value for a matching property, break from loop
1418 }
1419 }
1420
1421 return $value;
1422}
1423
[23306]1424# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1425# given that perllib is in @INC in order to invoke this subroutine.
1426# Call as follows -- after setting up INC to include perllib and
1427# after setting up GSDLHOME and GSDLOS:
1428#
1429# require util;
1430# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1431#
1432sub setup_greenstone_env() {
1433 my ($GSDLHOME, $GSDLOS) = @_;
1434
1435 #my %env_map = ();
1436 # Get the localised ENV settings of running a localised source setup.bash
[23314]1437 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1438 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1439 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[28375]1440 if (($GSDLOS =~ m/windows/i) && ($^O ne "cygwin")) {
[23314]1441 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1442 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1443 }
1444 if (!open(PIN, "$perl_command |")) {
1445 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1446 }
[23306]1447
[29582]1448 my $lastkey;
[23306]1449 while (defined (my $perl_output_line = <PIN>)) {
1450 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
[29582]1451 if(defined $key) {
1452 #$env_map{$key}=$value;
1453 $ENV{$key}=$value;
1454 $lastkey = $key;
1455 } elsif($lastkey && $perl_output_line !~ m/^\s*$/) {
1456 # there was no equals sign in $perl_output_line, so this
1457 # $perl_output_line may be a spillover from the previous
1458 $ENV{$lastkey} = $ENV{$lastkey}."\n".$perl_output_line;
1459 }
[23306]1460 }
[24563]1461 close (PIN);
1462
[23306]1463 # If any keys in $ENV don't occur in Greenstone's localised env
1464 # (stored in $env_map), delete those entries from $ENV
1465 #foreach $key (keys %ENV) {
1466 # if(!defined $env_map{$key}) {
[29708]1467 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{$key}\n";
[23306]1468 # delete $ENV{$key}; # del $ENV(key, value) pair
1469 # }
1470 #}
1471 #undef %env_map;
1472}
1473
[24362]1474sub get_perl_exec() {
1475 my $perl_exec = $^X; # may return just "perl"
1476
1477 if($ENV{'PERLPATH'}) {
[27303]1478 # OR: # $perl_exec = &FileUtils::filenameConcatenate($ENV{'PERLPATH'},"perl");
[28375]1479 if (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin")) {
[24362]1480 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1481 } else {
1482 $perl_exec = "$ENV{'PERLPATH'}/perl";
1483 }
1484 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1485 # containing the full path to the current perl executable we're using
1486 $perl_exec = $Config{perlpath}; # configured path for perl
1487 if (!-e $perl_exec) { # may not point to location on this machine
1488 $perl_exec = $^X; # may return just "perl"
1489 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1490 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1491 }
1492 }
1493 }
1494
1495 return $perl_exec;
1496}
1497
[25533]1498# returns the path to the java command in the JRE included with GS (if any),
1499# quoted to safeguard any spaces in this path, otherwise a simple java
1500# command is returned which assumes and will try for a system java.
[25512]1501sub get_java_command {
1502 my $java = "java";
1503 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1504 # after running setup.bat or from GLI which also runs setup.bat
[27303]1505 my $java_bin = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"packages","jre","bin");
[25512]1506 if(-d $java_bin) {
[27303]1507 $java = &FileUtils::filenameConcatenate($java_bin,"java");
[25533]1508 $java = "\"".$java."\""; # quoted to preserve spaces in path
[25512]1509 }
1510 }
1511 return $java;
1512}
[24362]1513
[25512]1514
[25577]1515# Given the qualified collection name (colgroup/collection),
1516# returns the collection and colgroup parts
1517sub get_collection_parts {
1518 # http://perldoc.perl.org/File/Basename.html
1519 # my($filename, $directories, $suffix) = fileparse($path);
1520 # "$directories contains everything up to and including the last directory separator in the $path
1521 # including the volume (if applicable). The remainder of the $path is the $filename."
1522 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1523
1524 my $qualified_collection = shift(@_);
1525
1526 # Since activate.pl can be launched from the command-line, including by a user,
1527 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1528 # Also allow for the accidental inclusion of multiple slashes
1529 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1530
1531 if(!defined $collection) {
1532 $collection = $colgroup;
1533 $colgroup = "";
1534 }
1535 return ($collection, $colgroup);
1536}
1537
1538# work out the "collectdir/collection" location
1539sub resolve_collection_dir {
1540 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1541
[28211]1542 if (defined $ENV{'GSDLCOLLECTDIR'}) { # a predefined collection dir exists
1543 return $ENV{'GSDLCOLLECTDIR'};
1544 }
1545
[25577]1546 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1547
[28175]1548 if (!defined $collect_dir || !$collect_dir) { # if undefined or empty string
1549 $collect_dir = &util::get_working_collect_dir($site);
[25577]1550 }
[28175]1551
1552 return &FileUtils::filenameConcatenate($collect_dir,$colgroup,$collection);
1553}
1554
1555# work out the full path to "collect" of this greenstone 2/3 installation
1556sub get_working_collect_dir {
[28211]1557 my ($site) = @_;
1558
1559 if (defined $ENV{'GSDLCOLLECTHOME'}) { # a predefined collect dir exists
1560 return $ENV{'GSDLCOLLECTHOME'};
[28175]1561 }
[28211]1562
1563 if (defined $site && $site) { # site non-empty, so get default collect dir for GS3
1564
1565 if (defined $ENV{'GSDL3HOME'}) {
1566 return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect"); # web folder
1567 }
1568 elsif (defined $ENV{'GSDL3SRCHOME'}) {
1569 return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site,"collect");
1570 }
1571 }
1572
[28213]1573 elsif (defined $ENV{'SITEHOME'}) {
[28211]1574 return &FileUtils::filenameConcatenate($ENV{'SITEHOME'},"collect");
[28177]1575 }
1576
1577 else { # get default collect dir for GS2
1578 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect");
1579 }
1580}
1581
[28211]1582sub is_abs_path_any_os {
[28177]1583 my ($path) = @_;
1584
[28211]1585 # We can have filenames in our DBs that were produced on other OS, so this method exists
1586 # to help identify absolute paths in such cases.
1587
[28177]1588 return 1 if($path =~ m@^/@); # full paths begin with forward slash on linux/mac
1589 return 1 if($path =~ m@^([a-zA-Z]\:|\\)@); # full paths begin with drive letter colon for Win or \ for volume, http://stackoverflow.com/questions/13011013/get-only-volume-name-from-filepath
1590
1591 return 0;
1592}
1593
1594
1595# This subroutine is for improving portability of Greenstone collections from one OS to another,
1596# to be used to convert absolute paths going into db files into paths with placeholders instead.
1597# This sub works with util::get_common_gs_paths and takes a path to a greenstone file and, if it's
1598# an absolute path, then it will replace the longest matching greenstone-path prefix of the given
1599# path with a placeholder to match.
1600# The Greenstone-path prefixes that can be matched are the following common Greenstone paths:
1601# the path to the current (specific) collection, the path to the general GS collect directory,
1602# the path to the site directory if GS3, else the path to the GSDLHOME/GSDL3HOME folder.
1603# The longest matching prefix will be replaced with the equivalent placeholder:
1604# @THISCOLLECTPATH@, else @COLLECTHOME@, else @SITEHOME@, else @GSDLHOME@.
1605sub abspath_to_placeholders {
1606 my $path = shift(@_); # path to convert from absolute to one with placeholders
[28236]1607 my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
[28177]1608
[28211]1609 return $path unless is_abs_path_any_os($path); # path is relative
[28236]1610
1611 if ($opt_long_or_short_winfilenames eq "long") {
1612 $path = &util::upgrade_if_dos_filename($path); # will only do something on windows
1613 }
[28225]1614
[28177]1615 # now we know we're dealing with absolute paths and have to replace gs prefixes with placeholders
[28213]1616 my @gs_paths = ($ENV{'GSDLCOLLECTDIR'}, $ENV{'GSDLCOLLECTHOME'}, $ENV{'SITEHOME'}, $ENV{'GREENSTONEHOME'}); # list in this order: from longest to shortest path
[28177]1617
[28211]1618 my %placeholder_map = ($ENV{'GREENSTONEHOME'} => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1619 $ENV{'GSDLCOLLECTHOME'} => '@COLLECTHOME@',
1620 $ENV{'GSDLCOLLECTDIR'} => '@THISCOLLECTPATH@'
[28177]1621 );
[28213]1622 $placeholder_map{$ENV{'SITEHOME'}} = '@SITEHOME@' if defined $ENV{'SITEHOME'};
[28177]1623
[28228]1624 $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
[28177]1625
[28236]1626 if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
[28228]1627 # for windows need to look for matches on short file names too
1628 # matched paths are again to be replaced with the usual placeholders
1629
1630 my $gsdlcollectdir = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'});
1631 my $gsdlcollecthome = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'});
1632 my $sitehome = (defined $ENV{'SITEHOME'}) ? &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) : undef;
1633 my $greenstonehome = &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'});
1634
1635 @gs_paths = ($gsdlcollectdir, $gsdlcollecthome, $sitehome, $greenstonehome); # order matters
1636
1637 %placeholder_map = ($greenstonehome => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1638 $gsdlcollecthome => '@COLLECTHOME@',
1639 $gsdlcollectdir => '@THISCOLLECTPATH@'
1640 );
1641 $placeholder_map{$sitehome} = '@SITEHOME@' if defined $sitehome;
1642
1643 $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
1644 }
1645
1646 return $path;
1647}
1648
1649sub _abspath_to_placeholders {
1650 my ($path, $gs_paths_ref, $placeholder_map_ref) = @_;
1651
[28177]1652 # The sequence of elements in @gs_paths matters
1653 # Need to loop starting from the *longest* matching path (the path to the specific collection)
1654 # to the shortest matching path (the path to gsdlhome/gsdl3home folder):
1655
[28228]1656 foreach my $gs_path (@$gs_paths_ref) {
[28213]1657 next if(!defined $gs_path); # site undefined for GS2
[28211]1658
[28177]1659 my $re_path = &util::filename_to_regex($gs_path); # escape for regex
1660
[28211]1661 if($path =~ m/^$re_path/i) { # case sensitive or not for OS?
[28177]1662
[28228]1663 my $placeholder = $placeholder_map_ref->{$gs_path}; # get the placeholder to replace the matched path with
[28177]1664
1665 $path =~ s/^$re_path/$placeholder/; #case sensitive or not?
1666 #$path =~ s/^[\\\/]//; # remove gs_path's trailing separator left behind at the start of the path
[28604]1667 # lowercase file extension, This is needed when shortfilenames are used, as case affects alphetical ordering, which affects diffcol
1668 $path =~ s/\.([A-Z]+)$/".".lc($1)/e;
[28177]1669 last; # done
1670 }
1671 }
1672
1673 return $path;
1674}
1675
1676# Function that does the reverse of the util::abspath_to_placeholders subroutine
1677# Once again, call this with the values returned from util::get_common_gs_paths
1678sub placeholders_to_abspath {
1679 my $path = shift(@_); # path that can contain placeholders to convert to resolved absolute path
[28236]1680 my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
[28177]1681
1682 return $path if($path !~ m/@/); # path contains no placeholders
1683
[28213]1684 # replace placeholders with gs prefixes
[28177]1685 my @placeholders = ('@THISCOLLECTPATH@', '@COLLECTHOME@', '@SITEHOME@', '@GSDLHOME@'); # order of paths not crucial in this case,
1686 # but listed here from longest to shortest once placeholders are have been resolved
1687
[28213]1688 # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
[28228]1689 my %placeholder_to_gspath_map;
[28236]1690 if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
[28228]1691 # always replace placeholders with short file names of the absolute paths on windows?
1692 %placeholder_to_gspath_map = ('@GSDLHOME@' => &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'}),
1693 '@COLLECTHOME@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'}),
1694 '@THISCOLLECTPATH@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'})
1695 );
1696 $placeholder_to_gspath_map{'@SITEHOME@'} = &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) if defined $ENV{'SITEHOME'};
1697 } else {
1698 %placeholder_to_gspath_map = ('@GSDLHOME@' => $ENV{'GREENSTONEHOME'},
1699 '@SITEHOME@' => $ENV{'SITEHOME'}, # can be undef
1700 '@COLLECTHOME@' => $ENV{'GSDLCOLLECTHOME'},
1701 '@THISCOLLECTPATH@' => $ENV{'GSDLCOLLECTDIR'}
1702 ); # $placeholder_to_gspath_map{'@SITEHOME@'} = $ENV{'SITEHOME'} if defined $ENV{'SITEHOME'};
1703 }
[28177]1704
1705 foreach my $placeholder (@placeholders) {
1706 my $gs_path = $placeholder_to_gspath_map{$placeholder};
1707
[28213]1708 next if(!defined $gs_path); # sitehome for GS2 is undefined
[28177]1709
1710 if($path =~ m/^$placeholder/) {
1711 $path =~ s/^$placeholder/$gs_path/;
1712 last; # done
1713 }
1714 }
1715
1716 return $path;
1717}
1718
[25994]1719# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1720# a directory containing sequentially numbered images.
1721sub create_itemfile
1722{
1723 my ($output_dir, $convert_basename, $convert_to) = @_;
[27303]1724 my $page_num = "";
[25994]1725
[27303]1726 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
[25994]1727 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
[27303]1728 closedir DIR;
[25994]1729
[27303]1730 # Sort files in the directory by page_num
[25994]1731 sub page_number {
1732 my ($dir) = @_;
[27970]1733 my ($pagenum) =($dir =~ m/^.*?[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1734# my ($pagenum) =($dir =~ m/(\d+)(\.(jpg|gif|png))?$/i); # this works but is not as safe/strict about input filepatterns as the above
[25994]1735
1736 $pagenum = 1 unless defined $pagenum;
1737 return $pagenum;
1738 }
1739
[27303]1740 # sort the files in the directory in the order of page_num rather than lexically.
[25994]1741 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1742
1743 # work out if the numbering of the now sorted image files starts at 0 or not
1744 # by checking the number of the first _image_ file (skipping item files)
1745 my $starts_at_0 = 0;
1746 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1747 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1748 $starts_at_0 = 1;
1749 }
1750
[27303]1751 my $item_file = &FileUtils::filenameConcatenate($output_dir, $convert_basename.".item");
1752 my $item_fh;
1753 &FileUtils::openFileHandle($item_file, 'w', \$item_fh);
1754 print $item_fh "<PagedDocument>\n";
[25994]1755
1756 foreach my $file (@dir_files){
1757 if ($file !~ /\.item/i){
1758 $page_num = page_number($file);
1759 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
[27303]1760 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1761 }
[25994]1762 }
1763
[27303]1764 print $item_fh "</PagedDocument>\n";
1765 &FileUtils::closeFileHandle($item_file, \$item_fh);
[25994]1766 return $item_file;
1767}
1768
[28560]1769# Sets the gnomelib_env. Based on the logic in wvware.pl which can perhaps be replaced with a call to this function in future
1770sub set_gnomelib_env
1771{
1772 ## SET THE ENVIRONMENT AS DONE IN SETUP.BASH/BAT OF GNOME-LIB
1773 # Though this is only needed for darwin Lion at this point (and android, though that is untested)
[27303]1774
[28560]1775 my $libext = "so";
1776 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1777 return;
1778 } elsif ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
1779 $libext = "dylib";
1780 }
1781
1782 if (!defined $ENV{'GEXTGNOME'}) {
1783 ##print STDERR "@@@ Setting GEXTGNOME env\n";
1784
1785 my $gnome_dir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"ext","gnome-lib-minimal");
1786
1787 if(! -d $gnome_dir) {
1788 $gnome_dir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"ext","gnome-lib");
1789
1790 if(! -d $gnome_dir) {
1791 $gnome_dir = "";
1792 }
1793 }
1794
1795 # now set other the related env vars,
1796 # IF we've found the gnome-lib dir installed in the ext folder
1797
1798 if ($gnome_dir ne "" && -f &FileUtils::filenameConcatenate($gnome_dir, $ENV{'GSDLOS'}, "lib", "libiconv.$libext")) {
1799 $ENV{'GEXTGNOME'} = $gnome_dir;
1800 $ENV{'GEXTGNOME_INSTALLED'}=&FileUtils::filenameConcatenate($ENV{'GEXTGNOME'}, $ENV{'GSDLOS'});
1801
1802 my $gnomelib_bin = &FileUtils::filenameConcatenate($ENV{'GEXTGNOME_INSTALLED'}, "bin");
1803 if(-d $gnomelib_bin) { # no bin subfolder in GS binary's cutdown gnome-lib-minimal folder
1804 &util::envvar_prepend("PATH", $gnomelib_bin);
1805 }
1806
1807 # util's prepend will create LD/DYLD_LIB_PATH if it doesn't exist yet
1808 my $gextlib = &FileUtils::filenameConcatenate($ENV{'GEXTGNOME_INSTALLED'}, "lib");
1809
1810 if($ENV{'GSDLOS'} eq "linux") {
1811 &util::envvar_prepend("LD_LIBRARY_PATH", $gextlib);
1812 }
1813 elsif ($ENV{'GSDLOS'} eq "darwin") {
1814 #&util::envvar_prepend("DYLD_LIBRARY_PATH", $gextlib);
1815 &util::envvar_prepend("DYLD_FALLBACK_LIBRARY_PATH", $gextlib);
1816 }
1817 }
1818
1819 # Above largely mimics the setup.bash of the gnome-lib-minimal.
1820 # Not doing the devel-srcpack that gnome-lib-minimal's setup.bash used to set
1821 # Not exporting GSDLEXTS variable either
1822 }
1823
1824# print STDERR "@@@@@ GEXTGNOME: ".$ENV{'GEXTGNOME'}."\n\tINSTALL".$ENV{'GEXTGNOME_INSTALLED'}."\n";
1825# print STDERR "\tPATH".$ENV{'PATH'}."\n";
1826# print STDERR "\tLD_LIB_PATH".$ENV{'LD_LIBRARY_PATH'}."\n" if $ENV{'LD_LIBRARY_PATH};
1827# print STDERR "\tDYLD_FALLBACK_LIB_PATH".$ENV{'DYLD_FALLBACK_LIBRARY_PATH'}."\n" if $ENV{'DYLD_FALLBACK_LIBRARY_PATH};
1828
1829 # if no GEXTGNOME, maybe users didn't need gnome-lib to run gnomelib/libiconv dependent binaries like hashfile, suffix, wget
1830 # (wvware is launched in a gnomelib env from its own script, but could possibly go through this script in future)
1831}
1832
1833
1834
[27374]1835## @function augmentINC()
1836#
1837# Prepend a path (if it exists) onto INC but only if it isn't already in INC
1838# @param $new_path The path to add
1839# @author jmt12
1840#
[27303]1841sub augmentINC
1842{
1843 my ($new_path) = @_;
1844 my $did_add_path = 0;
[27374]1845 # might need to be replaced with FileUtils::directoryExists() call eventually
[27303]1846 if (-d $new_path)
1847 {
1848 my $did_find_path = 0;
1849 foreach my $existing_path (@INC)
1850 {
1851 if ($existing_path eq $new_path)
1852 {
1853 $did_find_path = 1;
1854 last;
1855 }
1856 }
1857 if (!$did_find_path)
1858 {
1859 unshift(@INC, $new_path);
1860 $did_add_path = 1;
1861 }
1862 }
1863 return $did_add_path;
1864}
[27374]1865## augmentINC()
[27303]1866
[27374]1867
[4]18681;
Note: See TracBrowser for help on using the repository browser.