source: main/trunk/greenstone2/perllib/util.pm@ 30574

Last change on this file since 30574 was 30574, checked in by ak19, 8 years ago

Improving dispersed GS3: changes to util.pm fix the bug whereby the perl code always write to gs2build\tmp when building, even if the Greenstone folder is not writable. Now the TMP env var location is used and the tmp files get written to TMP\greenstone\web when the GS folder is not writable. Further tidied up bat and cmd files to remove extra variables, to break out of loop reading in build.properties file when the necessary properties are found.

  • Property svn:keywords set to Author Date Id Revision
File size: 61.8 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
[27509]29use FileUtils;
[23362]30
31use Encode;
[29807]32use Unicode::Normalize 'normalize';
33
[4]34use File::Copy;
[619]35use File::Basename;
[24362]36# Config for getting the perlpath in the recommended way, though it uses paths that are
37# hard-coded into the Config file that's generated upon configuring and compiling perl.
38# $^X works better in some cases to return the path to perl used to launch the script,
39# but if launched with plain "perl" (no full-path), that will be just what it returns.
[27303]40use Config;
41# New module for file related utility functions - intended as a
42# placeholder for an extension that allows a variety of different
43# filesystems (FTP, HTTP, SAMBA, WEBDav, HDFS etc)
44use FileUtils;
[4]45
[28226]46if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[28225]47 require Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
48}
49
[4]50# removes files (but not directories)
51sub rm {
[27303]52 warnings::warnif("deprecated", "util::rm() is deprecated, using FileUtils::removeFiles() instead");
53 return &FileUtils::removeFiles(@_);
[4]54}
55
56# recursive removal
[10211]57sub filtered_rm_r {
[27303]58 warnings::warnif("deprecated", "util::filtered_rm_r() is deprecated, using FileUtils::removeFilesFiltered() instead");
59 return &FileUtils::removeFilesFiltered(@_);
[4]60}
61
[10211]62# recursive removal
63sub rm_r {
[27326]64 warnings::warnif("deprecated", "util::rm_r() is deprecated, using FileUtils::removeFilesRecursive() instead");
[27303]65 return &FileUtils::removeFilesRecursive(@_);
[10211]66}
67
[721]68# moves a file or a group of files
69sub mv {
[27303]70 warnings::warnif("deprecated", "util::mv() is deprecated, using FileUtils::moveFiles() instead");
71 return &FileUtils::moveFiles(@_);
[721]72}
73
[25554]74# Move the contents of source directory into target directory
75# (as opposed to merely replacing target dir with the src dir)
76# This can overwrite any files with duplicate names in the target
77# but other files and folders in the target will continue to exist
78sub mv_dir_contents {
[27303]79 warnings::warnif("deprecated", "util::mv_dir_contents() is deprecated, using FileUtils::moveDirectoryContents() instead");
80 return &FileUtils::moveDirectoryContents(@_);
[25554]81}
[721]82
[4]83# copies a file or a group of files
84sub cp {
[27303]85 warnings::warnif("deprecated", "util::cp() is deprecated, using FileUtils::copyFiles() instead");
86 return &FileUtils::copyFiles(@_);
[4]87}
88
89# recursively copies a file or group of files
[1454]90# syntax: cp_r (sourcefiles, destination directory)
91# destination must be a directory - to copy one file to
92# another use cp instead
[4]93sub cp_r {
[27303]94 warnings::warnif("deprecated", "util::cp_r() is deprecated, using FileUtils::copyFilesrecursive() instead");
95 return &FileUtils::copyFilesRecursive(@_);
96}
[4]97
[21762]98# recursively copies a file or group of files
99# syntax: cp_r (sourcefiles, destination directory)
100# destination must be a directory - to copy one file to
101# another use cp instead
102sub cp_r_nosvn {
[27303]103 warnings::warnif("deprecated", "util::cp_r_nosvn() is deprecated, using FileUtils::copyFilesRecursiveNoSVN() instead");
104 return &FileUtils::copyFilesRecursiveNoSVN(@_);
[21762]105}
106
[11179]107# copies a directory and its contents, excluding subdirectories, into a new directory
108sub cp_r_toplevel {
[27303]109 warnings::warnif("deprecated", "util::cp_r_toplevel() is deprecated, using FileUtils::recursiveCopyTopLevel() instead");
110 return &FileUtils::recursiveCopyTopLevel(@_);
[11179]111}
112
[721]113sub mk_dir {
[27303]114 warnings::warnif("deprecated", "util::mk_dir() is deprecated, using FileUtils::makeDirectory() instead");
115 return &FileUtils::makeDirectory(@_);
[721]116}
117
[1046]118# in case anyone cares - I did some testing (using perls Benchmark module)
119# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
120# slightly faster (surprisingly) - Stefan.
[4]121sub mk_all_dir {
[27303]122 warnings::warnif("deprecated", "util::mk_all_dir() is deprecated, using FileUtils::makeAllDirectories() instead");
123 return &FileUtils::makeAllDirectories(@_);
[4]124}
125
[619]126# make hard link to file if supported by OS, otherwise copy the file
127sub hard_link {
[27303]128 warnings::warnif("deprecated", "util::hard_link() is deprecated, using FileUtils::hardLink() instead");
129 return &FileUtils::hardLink(@_);
[619]130}
131
[2193]132# make soft link to file if supported by OS, otherwise copy file
[721]133sub soft_link {
[27303]134 warnings::warnif("deprecated", "util::soft_link() is deprecated, using FileUtils::softLink() instead");
135 return &FileUtils::softLink(@_);
[721]136}
137
[23362]138# Primarily for filenames generated by processing
139# content of HTML files (which are mapped to UTF-8 internally)
140#
141# To turn this into an octet string that really exists on the file
142# system:
143# 1. don't need to do anything special for Unix-based systems
144# (as underlying file system is byte-code)
145# 2. need to map to short DOS filenames for Windows
[721]146
[23362]147sub utf8_to_real_filename
148{
149 my ($utf8_filename) = @_;
[721]150
[23362]151 my $real_filename;
[721]152
[28375]153 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]154 require Win32;
[23388]155
[23362]156 my $unicode_filename = decode("utf8",$utf8_filename);
157 $real_filename = Win32::GetShortPathName($unicode_filename);
158 }
159 else {
160 $real_filename = $utf8_filename;
161 }
162
163 return $real_filename;
164}
165
[29794]166sub raw_filename_to_unicode
167{
[29810]168 my ($directory, $raw_file, $filename_encoding ) = @_;
[29794]169
[29810]170 my $unicode_filename = $raw_file;
171 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[29816]172 # Try turning a short version to the long version
173 # If there are "funny" characters in the file name, that can't be represented in the ANSI code, then we will have a short weird version, eg E74~1.txt
174 $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0));
175
176
177 if ($unicode_filename eq $raw_file) {
178 # This means the original filename *was* able to be encoded in the local ANSI file encoding (eg windows_1252), so now we turn it back to perl's unicode
[29810]179
[29816]180 $unicode_filename = &Encode::decode(locale_fs => $unicode_filename);
181 }
182 # else This means we did have one of the funny filenames. the getLongPathName (used in upgrade_if_dos_filename) will return unicode, so we don't need to do anything more.
183
[29810]184
185 } else {
[29816]186 # we had a utf-8 string, turn it into perl internal unicode
187 $unicode_filename = &Encode::decode("utf-8", $unicode_filename);
[29810]188
189
[29816]190 }
191 #Does the filename have url encoded chars in it?
192 if (&unicode::is_url_encoded($unicode_filename)) {
193 $unicode_filename = &unicode::url_decode($unicode_filename);
194 }
195
[29794]196 # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars
197 if ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
[29816]198 $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C'
199 }
[29794]200 return $unicode_filename;
201
202}
[27303]203sub fd_exists {
204 warnings::warnif("deprecated", "util::fd_exists() is deprecated, using FileUtils::fileTest() instead");
205 return &FileUtils::fileTest(@_);
[23362]206}
207
[27303]208sub file_exists {
209 warnings::warnif("deprecated", "util::file_exists() is deprecated, using FileUtils::fileExists() instead");
210 return &FileUtils::fileExists(@_);
[23362]211}
212
[27303]213sub dir_exists {
214 warnings::warnif("deprecated", "util::dir_exists() is deprecated, using FileUtils::directoryExists() instead");
215 return &FileUtils::directoryExists(@_);
[23362]216}
217
[4]218# updates a copy of a directory in some other part of the filesystem
219# verbosity settings are: 0=low, 1=normal, 2=high
220# both $fromdir and $todir should be absolute paths
221sub cachedir {
[27303]222 warnings::warnif("deprecated", "util::cachedir() is deprecated, using FileUtils::synchronizeDirectories() instead");
223 return &FileUtils::synchronizeDirectories(@_);
[4]224}
225
226# this function returns -1 if either file is not found
227# assumes that $file1 and $file2 are absolute file names or
228# in the current directory
229# $file2 is allowed to be newer than $file1
230sub differentfiles {
[27303]231 warnings::warnif("deprecated", "util::differentfiles() is deprecated, using FileUtils::differentFiles() instead");
232 return &FileUtils::differentFiles(@_);
[4]233}
234
235
[30574]236# works out the temporary directory, including in the case where Greenstone is not writable
237# In that case, gs3-setup.bat would already have set the GS_TMP_OUTPUT_DIR temp variable
238sub determine_tmp_dir
239{
240 my $try_collect_dir = shift(@_) || 0;
241
242 my $tmp_dirname;
243 if(defined $ENV{'GS_TMP_OUTPUT_DIR'}) {
244 $tmp_dirname = $ENV{'GS_TMP_OUTPUT_DIR'};
245 } elsif($try_collect_dir && defined $ENV{'GSDLCOLLECTDIR'}) {
246 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
247 } elsif(defined $ENV{'GSDLHOME'}) {
248 $tmp_dirname = $ENV{'GSDLHOME'};
249 } else {
250 return undef;
251 }
252
253 if(!defined $ENV{'GS_TMP_OUTPUT_DIR'}) {
254 # test the tmp_dirname folder is writable, by trying to write out a file
255 # Unfortunately, cound not get if(-w $dirname) to work on directories on Windows
256 ## http://alvinalexander.com/blog/post/perl/perl-file-test-operators-reference-cheat-sheet (test file/dir writable)
257 ## http://www.makelinux.net/alp/083 (real and effective user IDs)
258
259 my $tmp_test_file = &FileUtils::filenameConcatenate($tmp_dirname, "writability_test.tmp");
260 if (open (FOUT, $tmp_test_file)) {
261 close(FOUT);
262 &FileUtils::removeFiles($tmp_test_file);
263 } else { # location not writable, use TMP location
264 $tmp_dirname = &FileUtils::filenameConcatenate($ENV{'TMP'}, "greenstone");
265 $ENV{'GS_TMP_OUTPUT_DIR'} = $tmp_dirname; # store for next time
266 }
267 }
268
269 $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "tmp");
270 &FileUtils::makeAllDirectories ($tmp_dirname) unless -e $tmp_dirname;
271
272 return $tmp_dirname;
273}
274
[16266]275sub get_tmp_filename
276{
277 my $file_ext = shift(@_) || undef;
278
[22438]279 my $opt_dot_file_ext = "";
280 if (defined $file_ext) {
281 if ($file_ext !~ m/\./) {
282 # no dot, so needs one added in at start
283 $opt_dot_file_ext = ".$file_ext"
284 }
285 else {
286 # allow for "extensions" such as _metadata.txt to be handled
287 # gracefully
288 $opt_dot_file_ext = $file_ext;
289 }
290 }
[16266]291
[30574]292 my $tmpdir = &util::determine_tmp_dir(0);
[4]293
294 my $count = 1000;
295 my $rand = int(rand $count);
[27303]296 my $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[16266]297
298 while (-e $full_tmp_filename) {
[4]299 $rand = int(rand $count);
[27303]300 $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[4]301 $count++;
302 }
[16266]303
304 return $full_tmp_filename;
[4]305}
306
[28066]307# These 2 are "static" variables used by the get_timestamped_tmp_folder() subroutine below and
308# belong with that function. They help ensure the timestamped tmp folders generated are unique.
309my $previous_timestamp = undef;
310my $previous_timestamp_f = 0; # frequency
311
[22886]312sub get_timestamped_tmp_folder
[22873]313{
[30574]314 my $tmp_dirname = &util::determine_tmp_dir(1);
[28066]315
[22873]316 # add the timestamp into the path otherwise we can run into problems
317 # if documents have the same name
[28066]318 my $timestamp = time;
319
320 if (!defined $previous_timestamp || ($timestamp > $previous_timestamp)) {
321 $previous_timestamp_f = 0;
322 $previous_timestamp = $timestamp;
323 } else {
324 $previous_timestamp_f++;
325 }
326
[27303]327 my $time_tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, $timestamp);
[28066]328 $tmp_dirname = $time_tmp_dirname;
329 my $i = $previous_timestamp_f;
330
331 if($previous_timestamp_f > 0) {
332 $tmp_dirname = $time_tmp_dirname."_".$i;
333 $i++;
334 }
[22873]335 while (-e $tmp_dirname) {
[28066]336 $tmp_dirname = $time_tmp_dirname."_".$i;
[22873]337 $i++;
338 }
[28066]339 &FileUtils::makeDirectory($tmp_dirname);
340
[22886]341 return $tmp_dirname;
342}
[22873]343
[22886]344sub get_timestamped_tmp_filename_in_collection
345{
346
347 my ($input_filename, $output_ext) = @_;
348 # derive tmp filename from input filename
349 my ($tailname, $dirname, $suffix)
350 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
351
352 # softlink to collection tmp dir
353 my $tmp_dirname = &util::get_timestamped_tmp_folder();
354 $tmp_dirname = $dirname unless defined $tmp_dirname;
355
[22873]356 # following two steps copied from ConvertBinaryFile
[22886]357 # do we need them?? can't use them as is, as they use plugin methods.
358
[22873]359 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
360
361 # URLEncode this since htmls with images where the html filename is utf8 don't seem
362 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
363 # files on the filesystem.
364 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
365 if (defined $output_ext) {
366 $output_ext = ".$output_ext"; # add the dot
367 } else {
368 $output_ext = $suffix;
369 }
370 $output_ext= lc($output_ext);
[27303]371 my $tmp_filename = &FileUtils::filenameConcatenate($tmp_dirname, "$tailname$output_ext");
[22873]372
373 return $tmp_filename;
374}
375
[21218]376sub get_toplevel_tmp_dir
377{
[27303]378 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
[21218]379}
380
381
[28460]382sub get_collectlevel_tmp_dir
383{
384 my $tmp_dirname = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "tmp");
385 &FileUtils::makeDirectory($tmp_dirname) if (!-e $tmp_dirname);
386
387 return $tmp_dirname;
388}
389
390
[17512]391sub filename_to_regex {
392 my $filename = shift (@_);
[4]393
[24971]394 # need to make single backslashes double so that regex works
[24832]395 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
[24829]396
[24832]397 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
398 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
[24829]399 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
400 $filename =~ s@\(@\\(@g; # escape brackets
401 $filename =~ s@\)@\\)@g; # escape brackets
[24932]402 $filename =~ s@\[@\\[@g; # escape brackets
403 $filename =~ s@\]@\\]@g; # escape brackets
[24829]404
[17512]405 return $filename;
406}
407
[24829]408sub unregex_filename {
409 my $filename = shift (@_);
410
411 # need to put doubled backslashes for regex back to single
412 $filename =~ s/\\\./\./g; # remove RE syntax for .
413 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
414 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
[24932]415 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
416 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
[24940]417
418 # \\ goes to \
419 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
420 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
[24829]421 return $filename;
422}
423
[4]424sub filename_cat {
[27303]425 # I've disabled this warning for now, as every Greenstone perl
426 # script seems to make use of this function and so you drown in a
427 # sea of deprecated warnings [jmt12]
428# warnings::warnif("deprecated", "util::filename_cat() is deprecated, using FileUtils::filenameConcatenate() instead");
429 return &FileUtils::filenameConcatenate(@_);
[4]430}
431
[21413]432
[28394]433sub _pathname_cat {
434 my $join_char = shift(@_);
435 my $first_path = shift(@_);
[21413]436 my (@pathnames) = @_;
437
438 # If first_path is not null or empty, then add it back into the list
439 if (defined $first_path && $first_path =~ /\S/) {
440 unshift(@pathnames, $first_path);
441 }
442
443 my $pathname = join($join_char, @pathnames);
444
445 # remove duplicate slashes
[28394]446 if ($join_char eq ";") {
[21413]447 $pathname =~ s/[\\\/]+/\\/g;
[28460]448 if ($^O eq "cygwin") {
449 # Once we've collapsed muliple (potentialy > 2) slashes
450 # For cygwin, actually want things double-backslahed
451 $pathname =~ s/\\/\\\\/g;
452 }
453
[21413]454 } else {
455 $pathname =~ s/[\/]+/\//g;
456 # DB: want a pathname abc\de.html to remain like this
457 }
458
459 return $pathname;
460}
461
462
[28394]463sub pathname_cat {
464 my (@pathnames) = @_;
465
466 my $join_char;
467 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
468 $join_char = ";";
469 } else {
470 $join_char = ":";
471 }
472 return _pathname_cat($join_char,@pathnames);
473}
474
475
476sub javapathname_cat {
477 my (@pathnames) = @_;
478
479 my $join_char;
480
481 # Unlike pathname_cat() above, not interested if running in a Cygwin environment
482 # This is because the java we run is actually a native Windows executable
483
484 if (($ENV{'GSDLOS'} =~ /^windows$/i)) {
485 $join_char = ";";
486 } else {
487 $join_char = ":";
488 }
489 return _pathname_cat($join_char,@pathnames);
490}
491
492
[28460]493sub makeFilenameJavaCygwinCompatible
494{
495 my ($java_filename) = @_;
[28394]496
[28460]497 if ($^O eq "cygwin") {
498 # To be used with a Java program, but under Cygwin
499 # Because the java binary that is native to Windows, need to
500 # convert the Cygwin paths (i.e. Unix style) to be Windows
501 # compatible
502
503 $java_filename = `cygpath -wp "$java_filename"`;
504 chomp($java_filename);
505 $java_filename =~ s%\\%\\\\%g;
506 }
507
508 return $java_filename;
509}
510
[19616]511sub tidy_up_oid {
512 my ($OID) = @_;
[29112]513 if ($OID =~ /[\.\/\\]/) {
514 print STDERR "Warning, identifier $OID contains periods or slashes(.\\\/), replacing them with _\n";
515 $OID =~ s/[\.\\\/]/_/g; #remove any periods
[19616]516 }
517 if ($OID =~ /^\s.*\s$/) {
518 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
519 # remove starting and trailing whitespace
520 $OID =~ s/^\s+//;
521 $OID =~ s/\s+$//;
522 }
523 if ($OID =~ /^[\d]*$/) {
524 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
525 $OID = "D" . $OID;
526 }
527
528 return $OID;
529}
[26206]530
[10212]531sub envvar_prepend {
532 my ($var,$val) = @_;
533
[26206]534 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
[28375]535## my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[26206]536
[28375]537 # Rewritten above to make ":" the default (Windows is the special
538 # case, anything else 'unusual' such as Solaris etc is Unix)
539 my $pathsep = (defined $ENV{'GSDLOS'} && (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin"))) ? ";" : ":";
540
[16404]541 # do not prepend any value/path that's already in the environment variable
[24832]542
543 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
544 if (!defined($ENV{$var})) {
545 $ENV{$var} = "$val";
[16442]546 }
[24832]547 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]548 $ENV{$var} = "$val".$pathsep.$ENV{$var};
[10212]549 }
550}
551
552sub envvar_append {
553 my ($var,$val) = @_;
[26206]554
555 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
556 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[24832]557
[16404]558 # do not append any value/path that's already in the environment variable
[26206]559
[24832]560 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
561 if (!defined($ENV{$var})) {
562 $ENV{$var} = "$val";
[16442]563 }
[24832]564 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]565 $ENV{$var} = $ENV{$var}.$pathsep."$val";
[24832]566 }
[10212]567}
568
[16442]569
[28394]570
[16380]571# splits a filename into a prefix and a tail extension using the tail_re, or
572# if that fails, splits on the file_extension . (dot)
573sub get_prefix_and_tail_by_regex {
[10212]574
[16380]575 my ($filename,$tail_re) = @_;
576
577 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
578 if ((!defined $file_prefix) || (!defined $file_ext)) {
579 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
580 }
581
582 return ($file_prefix,$file_ext);
583}
584
585# get full path and file only path from a base_dir (which may be empty) and
586# file (which may contain directories)
587sub get_full_filenames {
588 my ($base_dir, $file) = @_;
589
[28375]590# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
591# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
592# print STDERR "** Calling method: $lcfilename:$cline $cpackage->$csubr\n";
593
594
[16380]595 my $filename_full_path = $file;
596 # add on directory if present
[27303]597 $filename_full_path = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\S/;
[16380]598
599 my $filename_no_path = $file;
600
601 # remove directory if present
602 $filename_no_path =~ s/^.*[\/\\]//;
603 return ($filename_full_path, $filename_no_path);
604}
605
[8682]606# returns the path of a file without the filename -- ie. the directory the file is in
607sub filename_head {
608 my $filename = shift(@_);
609
[28375]610 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[8682]611 $filename =~ s/[^\\\\]*$//;
612 }
613 else {
614 $filename =~ s/[^\\\/]*$//;
615 }
616
617 return $filename;
618}
619
620
[23362]621
[1454]622# returns 1 if filename1 and filename2 point to the same
623# file or directory
624sub filenames_equal {
625 my ($filename1, $filename2) = @_;
626
627 # use filename_cat to clean up trailing slashes and
628 # multiple slashes
[27303]629 $filename1 = &FileUtils::filenameConcatenate($filename1);
630 $filename2 = &FileUtils::filenameConcatenate($filename2);
[1454]631
632 # filenames not case sensitive on windows
633 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
634 $filename1 =~ tr/[A-Z]/[a-z]/;
635 $filename2 =~ tr/[A-Z]/[a-z]/;
636 }
637 return 1 if $filename1 eq $filename2;
638 return 0;
639}
640
[24932]641# If filename is relative to within_dir, returns the relative path of filename to that directory
642# with slashes in the filename returned as they were in the original (absolute) filename.
[23362]643sub filename_within_directory
644{
645 my ($filename,$within_dir) = @_;
646
[23371]647 if ($within_dir !~ m/[\/\\]$/) {
648 my $dirsep = &util::get_dirsep();
[23362]649 $within_dir .= $dirsep;
650 }
651
[24829]652 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]653 if ($filename =~ m/^$within_dir(.*)$/) {
654 $filename = $1;
655 }
656
657 return $filename;
658}
659
[24932]660# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
661# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
662# The subpath returned will also be a URL type filename.
663sub filename_within_directory_url_format
664{
665 my ($filename,$within_dir) = @_;
666
667 # convert parameters only to / slashes if Windows
668
[24971]669 my $filename_urlformat = &filepath_to_url_format($filename);
670 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
671
[24932]672 #if ($within_dir_urlformat !~ m/\/$/) {
673 # make sure directory ends with a slash
674 #$within_dir_urlformat .= "/";
675 #}
676
677 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
678
679 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
680
681 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
682 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
683 $filename_urlformat = $1;
684 }
685
686 return $filename_urlformat;
687}
688
[24971]689# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
690# since on Linux it doesn't represent a file separator but an escape char).
691sub filepath_to_url_format
692{
693 my ($filepath) = @_;
[28375]694 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[24971]695 # Only need to worry about Windows, as Unix style directories already in url-format
696 # Convert Windows style \ => /
697 $filepath =~ s@\\@/@g;
698 }
699 return $filepath;
700}
[24932]701
[25093]702# regex filepaths on windows may include \\ as path separator. Convert \\ to /
703sub filepath_regex_to_url_format
704{
705 my ($filepath) = @_;
[28375]706 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[25093]707 # Only need to worry about Windows, as Unix style directories already in url-format
708 # Convert Windows style \\ => /
709 $filepath =~ s@\\\\@/@g;
710 }
711 return $filepath;
712
713}
[24971]714
[25093]715# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
716# and ignores trailing /
717# returns (file, dirs) dirs will be empty if no subdirs
718sub url_fileparse
719{
720 my ($filepath) = @_;
721 # remove trailing /
722 $filepath =~ s@/$@@;
723 if ($filepath !~ m@/@) {
724 return ($filepath, "");
725 }
726 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
727 return ($file, $dirs);
728
729}
730
731
[10281]732sub filename_within_collection
733{
734 my ($filename) = @_;
735
736 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
737
738 if (defined $collect_dir) {
[23362]739
[15875]740 # if from within GSDLCOLLECTDIR, then remove directory prefix
741 # so source_filename is realative to it. This is done to aid
742 # portability, i.e. the collection can be moved to somewhere
743 # else on the file system and the archives directory will still
744 # work. This is needed, for example in the applet version of
745 # GLI where GSDLHOME/collect on the server will be different to
746 # the collect directory of the remove user. Of course,
747 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
748 # it back into a full pathname.
[23362]749
750 $filename = filename_within_directory($filename,$collect_dir);
[10281]751 }
752
753 return $filename;
754}
755
[23362]756sub prettyprint_file
757{
[23484]758 my ($base_dir,$file,$gli) = @_;
[23362]759
[27303]760 my $filename_full_path = &FileUtils::filenameConcatenate($base_dir,$file);
[23362]761
[28375]762 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]763 require Win32;
764
765 # For some reason base_dir in the form c:/a/b/c
766 # This leads to confusion later on, so turn it back into
767 # the more usual Windows form
768 $base_dir =~ s/\//\\/g;
769 my $long_base_dir = Win32::GetLongPathName($base_dir);
770 my $long_full_path = Win32::GetLongPathName($filename_full_path);
771
772 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]773 $file = encode("utf8",$file) if ($gli);
[23362]774 }
775
776 return $file;
777}
778
779
780sub upgrade_if_dos_filename
781{
[23371]782 my ($filename_full_path,$and_encode) = @_;
[23362]783
[28375]784 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]785 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
786 # to its long (Windows) version
[23416]787 my $long_filename = Win32::GetLongPathName($filename_full_path);
788 if (defined $long_filename) {
[29810]789
[23416]790 $filename_full_path = $long_filename;
791 }
[23362]792 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]793 $filename_full_path =~ s/^(.):/\u$1:/;
[29810]794
[23371]795 if ((defined $and_encode) && ($and_encode)) {
796 $filename_full_path = encode("utf8",$filename_full_path);
797 }
[23362]798 }
799
800 return $filename_full_path;
801}
802
803
[23388]804sub downgrade_if_dos_filename
805{
806 my ($filename_full_path) = @_;
807
[28375]808 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23388]809 require Win32;
810
811 # Ensure the given long Windows filename is in a form that can
812 # be opened by Perl => convert it to a short DOS-like filename
813
[23414]814 my $short_filename = Win32::GetShortPathName($filename_full_path);
815 if (defined $short_filename) {
816 $filename_full_path = $short_filename;
817 }
[23416]818 # Make sure initial drive letter is lower-case (to fit in
819 # with rest of Greenstone)
[23483]820 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]821 }
822
823 return $filename_full_path;
824}
825
[23561]826sub block_filename
827{
828 my ($block_hash,$filename) = @_;
829
[28375]830 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
[23561]831
832 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
833 my $lower_filename = lc($filename);
834 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
835# my $lower_drive = $filename;
836# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
837
838# my $upper_drive = $filename;
839# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
840#
841# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
842# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
843 }
844 else {
845 $block_hash->{'file_blocks'}->{$filename} = 1;
846 }
847}
[23388]848
[23561]849
[18441]850sub filename_is_absolute
851{
[27303]852 warnings::warnif("deprecated", "util::filename_is_absolute() is deprecated, using FileUtils::isFilenameAbsolute() instead");
853 return &FileUtils::isFilenameAbsolute(@_);
[18441]854}
855
856
[17572]857## @method make_absolute()
858#
859# Ensure the given file path is absolute in respect to the given base path.
860#
861# @param $base_dir A string denoting the base path the given dir must be
862# absolute to.
863# @param $dir The directory to be made absolute as a string. Note that the
864# dir may already be absolute, in which case it will remain
865# unchanged.
866# @return The now absolute form of the directory as a string.
867#
868# @author John Thompson, DL Consulting Ltd.
869# @copy 2006 DL Consulting Ltd.
870#
871#used in buildcol.pl, doesn't work for all cases --kjdon
872sub make_absolute {
873
874 my ($base_dir, $dir) = @_;
[18441]875### print STDERR "dir = $dir\n";
[17572]876 $dir =~ s/[\\\/]+/\//g;
877 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
878 $dir =~ s|^/tmp_mnt||;
879 1 while($dir =~ s|/[^/]*/\.\./|/|g);
880 $dir =~ s|/[.][.]?/|/|g;
881 $dir =~ tr|/|/|s;
[18441]882### print STDERR "dir = $dir\n";
[17572]883
884 return $dir;
885}
886## make_absolute() ##
[10281]887
[7929]888sub get_dirsep {
889
[28375]890 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[7929]891 return "\\";
892 } else {
893 return "\/";
894 }
895}
896
[619]897sub get_os_dirsep {
[4]898
[28375]899 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[619]900 return "\\\\";
901 } else {
902 return "\\\/";
903 }
904}
905
906sub get_re_dirsep {
907
908 return "\\\\|\\\/";
909}
910
911
[15003]912sub get_dirsep_tail {
913 my ($filename) = @_;
914
915 # returns last part of directory or filename
916 # On unix e.g. a/b.d => b.d
917 # a/b/c => c
918
[15088]919 my $dirsep = get_re_dirsep();
920 my @dirs = split (/$dirsep/, $filename);
921 my $tail = pop @dirs;
[15003]922
[15088]923 # - caused problems under windows
924 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
925
[15003]926 return $tail;
927}
928
929
[4]930# if this is running on windows we want binaries to end in
931# .exe, otherwise they don't have to end in any extension
932sub get_os_exe {
[28375]933 return ".exe" if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"));
[4]934 return "";
935}
936
937
[86]938# test to see whether this is a big or little endian machine
[15713]939sub is_little_endian
940{
941 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
942 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
943 # Otherwise, it's little endian
944
945 #return 0 if $^O =~ /^darwin$/i;
[17714]946 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
947
948 # Going back to stating exactly whether the machine is little endian
949 # or big endian, without any special case for Macs. Since for rata it comes
950 # back with little endian and for shuttle with bigendian.
[15713]951 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]952}
[4]953
[86]954
[135]955# will return the collection name if successful, "" otherwise
956sub use_collection {
[1454]957 my ($collection, $collectdir) = @_;
[135]958
[1454]959 if (!defined $collectdir || $collectdir eq "") {
[27303]960 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect");
[1454]961 }
962
[28211]963 if (!defined $ENV{'GREENSTONEHOME'}) { # for GS3, would have been defined in use_site_collection, to GSDL3HOME
964 $ENV{'GREENSTONEHOME'} = $ENV{'GSDLHOME'};
965 }
966
[135]967 # get and check the collection
968 if (!defined($collection) || $collection eq "") {
969 if (defined $ENV{'GSDLCOLLECTION'}) {
970 $collection = $ENV{'GSDLCOLLECTION'};
971 } else {
[2359]972 print STDOUT "No collection specified\n";
[135]973 return "";
974 }
975 }
976
977 if ($collection eq "modelcol") {
[2359]978 print STDOUT "You can't use modelcol.\n";
[135]979 return "";
980 }
981
982 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
983 # are defined
[17204]984 $ENV{'GSDLCOLLECTION'} = $collection;
[28211]985 $ENV{'GSDLCOLLECTHOME'} = $collectdir;
[27303]986 $ENV{'GSDLCOLLECTDIR'} = &FileUtils::filenameConcatenate($collectdir, $collection);
[135]987
988 # make sure this collection exists
989 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]990 print STDOUT "Invalid collection ($collection).\n";
[135]991 return "";
992 }
993
994 # everything is ready to go
995 return $collection;
996}
997
[21207]998sub get_current_collection_name {
999 return $ENV{'GSDLCOLLECTION'};
1000}
[14926]1001
1002
1003# will return the collection name if successful, "" otherwise.
1004# Like use_collection (above) but for greenstone 3 (taking account of site level)
1005
1006sub use_site_collection {
1007 my ($site, $collection, $collectdir) = @_;
1008
1009 if (!defined $collectdir || $collectdir eq "") {
1010 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
[27303]1011 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'}, "sites", $site, "collect");
[14926]1012 }
1013
[28211]1014 if (defined $ENV{'GSDL3HOME'}) {
1015 $ENV{'GREENSTONEHOME'} = $ENV{'GSDL3HOME'};
1016 $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
1017 } elsif (defined $ENV{'GSDL3SRCHOME'}) {
1018 $ENV{'GREENSTONEHOME'} = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "web");
1019 $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
1020 } else {
1021 print STDERR "*** util::use_site_collection(). Warning: Neither GSDL3HOME nor GSDL3SRCHOME set.\n";
1022 }
1023
[14926]1024 # collectdir explicitly set by this point (using $site variable if required).
1025 # Can call "old" gsdl2 use_collection now.
1026
1027 return use_collection($collection,$collectdir);
1028}
1029
1030
1031
[15018]1032sub locate_config_file
1033{
1034 my ($file) = @_;
1035
1036 my $locations = locate_config_files($file);
1037
1038 return shift @$locations; # returns undef if 'locations' is empty
1039}
1040
1041
1042sub locate_config_files
1043{
1044 my ($file) = @_;
1045
1046 my @locations = ();
1047
1048 if (-e $file) {
1049 # Clearly specified (most likely full filename)
1050 # No need to hunt in 'etc' directories, return value unchanged
1051 push(@locations,$file);
1052 }
1053 else {
1054 # Check for collection specific one before looking in global GSDL 'etc'
[16969]1055 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1056 my $test_collect_etc_filename
[27303]1057 = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"etc", $file);
[16969]1058
1059 if (-e $test_collect_etc_filename) {
1060 push(@locations,$test_collect_etc_filename);
1061 }
[15018]1062 }
1063 my $test_main_etc_filename
[27303]1064 = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"etc", $file);
[15018]1065 if (-e $test_main_etc_filename) {
1066 push(@locations,$test_main_etc_filename);
1067 }
1068 }
1069
1070 return \@locations;
1071}
1072
1073
[9955]1074sub hyperlink_text
1075{
1076 my ($text) = @_;
1077
1078 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1079 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1080
1081 return $text;
1082}
1083
1084
[16436]1085# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1086# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
[27303]1087sub is_dir_empty {
1088 warnings::warnif("deprecated", "util::is_dir_empty() is deprecated, using FileUtils::isDirectoryEmpty() instead");
1089 return &FileUtils::isDirectoryEmpty(@_);
[16436]1090}
1091
[18337]1092# Returns the given filename converted using either URL encoding or base64
1093# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]1094# (if it is just the tailname), then $no_suffix should be some defined value.
1095# rename_method can be url, none, base64
[18319]1096sub rename_file {
[18337]1097 my ($filename, $rename_method, $no_suffix) = @_;
[18329]1098
[18337]1099 if(!$filename) { # undefined or empty string
[18329]1100 return $filename;
1101 }
[18319]1102
[20413]1103 if (!$rename_method) {
1104 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1105 # Debugging information
[22856]1106 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1107 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]1108 $rename_method = "url";
1109 } elsif($rename_method eq "none") {
1110 return $filename; # would have already been renamed
1111 }
1112
[19762]1113 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1114 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1115 ###$filename =~ s/ /_/g;
[18337]1116
1117 my ($tailname,$dirname,$suffix);
1118 if($no_suffix) { # given a tailname, no suffix
1119 ($tailname,$dirname) = File::Basename::fileparse($filename);
1120 }
1121 else {
1122 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1123 }
[23388]1124 if (!$suffix) {
1125 $suffix = "";
1126 }
[26973]1127 # This breaks GLI matching extracted metadata to files in Enrich panel, as
1128 # original is eg .JPG while gsdlsourcefilename ends up .jpg
1129 # Not sure why it was done in first place...
1130 #else {
1131 #$suffix = lc($suffix);
1132 #}
[18337]1133
[20413]1134 if ($rename_method eq "url") {
[18319]1135 $tailname = &unicode::url_encode($tailname);
1136 }
1137 elsif ($rename_method eq "base64") {
[18341]1138 $tailname = &unicode::base64_encode($tailname);
[18319]1139 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1140 }
[18326]1141
[18319]1142 $filename = "$tailname$suffix";
[18326]1143 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]1144
1145 return $filename;
1146}
1147
[21616]1148
1149# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]1150sub rename_ldb_or_bdb_file {
[18657]1151 my ($filename_no_ext) = @_;
1152
1153 my $new_filename = "$filename_no_ext.gdb";
[21615]1154 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]1155 # try ldb
1156 my $old_filename = "$filename_no_ext.ldb";
1157
1158 if (-f $old_filename) {
[19056]1159 print STDERR "Renaming $old_filename to $new_filename\n";
1160 rename ($old_filename, $new_filename)
1161 || print STDERR "Rename failed: $!\n";
[18657]1162 return;
1163 }
1164 # try bdb
1165 $old_filename = "$filename_no_ext.bdb";
1166 if (-f $old_filename) {
[19056]1167 print STDERR "Renaming $old_filename to $new_filename\n";
1168 rename ($old_filename, $new_filename)
1169 || print STDERR "Rename failed: $!\n";
[18657]1170 return;
1171 }
1172}
1173
[24874]1174sub os_dir() {
1175
1176 my $gsdlarch = "";
1177 if(defined $ENV{'GSDLARCH'}) {
1178 $gsdlarch = $ENV{'GSDLARCH'};
1179 }
1180 return $ENV{'GSDLOS'}.$gsdlarch;
1181}
[18657]1182
[28997]1183# returns 1 if this (GS server) is a GS3 installation, returns 0 if it's GS2.
1184sub is_gs3() {
1185 if($ENV{'GSDL3SRCHOME'}) {
1186 return 1;
1187 } else {
1188 return 0;
1189 }
1190}
1191
[21719]1192# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1193# By default, /greenstone3 for GS3 or /greenstone for GS2.
1194sub get_greenstone_url_prefix() {
1195 # if already set on a previous occasion, just return that
1196 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1197 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1198
[21719]1199 my ($configfile, $urlprefix, $defaultUrlprefix);
1200 my @propertynames = ();
1201
1202 if($ENV{'GSDL3SRCHOME'}) {
1203 $defaultUrlprefix = "/greenstone3";
[27303]1204 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
[21719]1205 push(@propertynames, qw/path\s*\=/);
1206 } else {
1207 $defaultUrlprefix = "/greenstone";
[27303]1208 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
[21719]1209 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1210 }
1211
1212 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1213
1214 if(!$urlprefix) { # no values found for URL prefix, use default values
1215 $urlprefix = $defaultUrlprefix;
1216 } else {
1217 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1218 $urlprefix =~ s/^\///; # remove the starting slash
1219 my @dirs = split(/(\\|\/)/, $urlprefix);
1220 $urlprefix = shift(@dirs);
1221
1222 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1223 $urlprefix = "/$urlprefix";
1224 }
1225 }
1226
1227 # set for the future
1228 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1229# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1230 return $urlprefix;
1231}
1232
1233
[28460]1234
1235#
[30561]1236# The following comes from activate.pl
[28460]1237#
1238# Designed to work with a server included with GS.
1239# - For GS3, we ask ant for the library URL.
1240# - For GS2, we derive the URL from the llssite.cfg file.
1241
1242sub get_full_greenstone_url_prefix
1243{
1244 # if already set on a previous occasion, just return that
1245 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1246 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1247
1248 my $gs_mode = ($ENV{'GSDL3SRCHOME'}) ? "gs3" : "gs2";
1249
1250 my $url = undef;
1251
1252 if($gs_mode eq "gs2") {
1253 my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
1254
1255 if(-f $llssite_cfg) {
1256 # check llssite.cfg for line with url property
1257 # for server.exe also need to use portnumber and enterlib properties
1258
1259 # Read in the entire contents of the file in one hit
1260 if (!open (FIN, $llssite_cfg)) {
1261 print STDERR "util::get_library_URL() failed to open $llssite_cfg ($!)\n";
1262 return undef;
1263 }
1264
1265 my $contents;
1266 sysread(FIN, $contents, -s FIN);
1267 close(FIN);
1268
1269 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
1270 my $enterlib = "";
[30561]1271 my $portnumber = "8282"; # will remain empty (implicit port 80) unless it's specifically been assigned
[28460]1272
1273 foreach my $line (@lines) {
1274 if($line =~ m/^url=(.*)$/) {
1275 $url = $1;
1276 } elsif($line =~ m/^enterlib=(.*)$/) {
1277 $enterlib = $1;
1278 } elsif($line =~ m/^portnumber=(.*)$/) {
1279 $portnumber = $1;
1280 }
1281 }
1282
1283 if(!$url) {
1284 return undef;
1285 }
1286 elsif($url eq "URL_pending") { # library is not running
1287 # do not process url=URL_pending in the file, since for server.exe
1288 # this just means the Enter Library button hasn't been pressed yet
1289 $url = undef;
1290 }
1291 else {
1292 # In the case of server.exe, need to do extra work to get the proper URL
1293 # But first, need to know whether we're indeed dealing with server.exe:
1294
1295 # compare the URL's domain to the full URL
1296 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
1297 my $uri = URI->new( $url );
1298 my $host = $uri->host;
1299 #print STDERR "@@@@@ host: $host\n";
1300 if($url =~ m/http:\/\/$host(\/)?$/) {
1301 #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
1302 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
1303 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
1304 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
1305 $url = $url.":".$portnumber.$enterlib;
1306 } # else, apache web server
1307
1308 }
1309 }
1310 } elsif($gs_mode eq "gs3") {
1311 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
1312 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
1313
1314 # Or, run the new target get-default-servlet-url
1315 # the output can look like:
1316 #
1317 # Buildfile: build.xml
1318 # [echo] os.name: Windows Vista
1319 #
1320 # get-default-servlet-url:
1321 # [echo] http://localhost:8383/greenstone3/library
1322 # BUILD SUCCESSFUL
1323 # Total time: 0 seconds
1324
1325 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
1326 # - see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
1327
1328 # The get-default-servlet-url ant target can be run from anywhere by specifying the
1329 # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
1330 # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
1331 # to run the activate.pl script in the first place
1332
1333 my $full_build_xml = &FileUtils::javaFilenameConcatenate($ENV{'GSDL3SRCHOME'},"build.xml");
1334
1335 my $perl_command = "ant -buildfile \"$full_build_xml\" get-default-servlet-url";
1336
1337 if (open(PIN, "$perl_command |")) {
1338 while (defined (my $perl_output_line = <PIN>)) {
1339
1340 if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
1341 $url="http://".$1;
1342 }
1343 }
1344 close(PIN);
1345 } else {
1346 print STDERR "util::get_library_URL() failed to run $perl_command to work out library URL for $gs_mode\n";
1347 }
1348 }
1349
1350 # either the url is still undef or it is now set
1351 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
1352 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
1353
1354 $ENV{'FULL_GREENSTONE_URL_PREFIX'} = $url;
1355
1356 return $url;
1357}
1358
1359
[21719]1360# Given a config file (xml or java properties file) and a list/array of regular expressions
1361# that represent property names to match on, this function will return the value for the 1st
1362# matching property name. If the return value is undefined, no matching property was found.
1363sub extract_propvalue_from_file() {
1364 my ($configfile, $propertynames) = @_;
1365
1366 my $value;
1367 unless(open(FIN, "<$configfile")) {
1368 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1369 return $value; # not initialised
1370 }
1371
1372 # Read the entire file at once, as one single line, then close it
1373 my $filecontents;
1374 {
1375 local $/ = undef;
1376 $filecontents = <FIN>;
1377 }
1378 close(FIN);
1379
1380 foreach my $regex (@$propertynames) {
1381 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1382 if($value) {
1383 $value =~ s/^\"//; # remove any startquotes
1384 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1385 last; # found value for a matching property, break from loop
1386 }
1387 }
1388
1389 return $value;
1390}
1391
[23306]1392# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1393# given that perllib is in @INC in order to invoke this subroutine.
1394# Call as follows -- after setting up INC to include perllib and
1395# after setting up GSDLHOME and GSDLOS:
1396#
1397# require util;
1398# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1399#
1400sub setup_greenstone_env() {
1401 my ($GSDLHOME, $GSDLOS) = @_;
1402
1403 #my %env_map = ();
1404 # Get the localised ENV settings of running a localised source setup.bash
[23314]1405 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1406 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1407 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[28375]1408 if (($GSDLOS =~ m/windows/i) && ($^O ne "cygwin")) {
[23314]1409 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1410 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1411 }
1412 if (!open(PIN, "$perl_command |")) {
1413 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1414 }
[23306]1415
[29582]1416 my $lastkey;
[23306]1417 while (defined (my $perl_output_line = <PIN>)) {
1418 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
[29582]1419 if(defined $key) {
1420 #$env_map{$key}=$value;
1421 $ENV{$key}=$value;
1422 $lastkey = $key;
1423 } elsif($lastkey && $perl_output_line !~ m/^\s*$/) {
1424 # there was no equals sign in $perl_output_line, so this
1425 # $perl_output_line may be a spillover from the previous
1426 $ENV{$lastkey} = $ENV{$lastkey}."\n".$perl_output_line;
1427 }
[23306]1428 }
[24563]1429 close (PIN);
1430
[23306]1431 # If any keys in $ENV don't occur in Greenstone's localised env
1432 # (stored in $env_map), delete those entries from $ENV
1433 #foreach $key (keys %ENV) {
1434 # if(!defined $env_map{$key}) {
[29708]1435 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{$key}\n";
[23306]1436 # delete $ENV{$key}; # del $ENV(key, value) pair
1437 # }
1438 #}
1439 #undef %env_map;
1440}
1441
[24362]1442sub get_perl_exec() {
1443 my $perl_exec = $^X; # may return just "perl"
1444
1445 if($ENV{'PERLPATH'}) {
[27303]1446 # OR: # $perl_exec = &FileUtils::filenameConcatenate($ENV{'PERLPATH'},"perl");
[28375]1447 if (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin")) {
[24362]1448 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1449 } else {
1450 $perl_exec = "$ENV{'PERLPATH'}/perl";
1451 }
1452 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1453 # containing the full path to the current perl executable we're using
1454 $perl_exec = $Config{perlpath}; # configured path for perl
1455 if (!-e $perl_exec) { # may not point to location on this machine
1456 $perl_exec = $^X; # may return just "perl"
1457 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1458 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1459 }
1460 }
1461 }
1462
1463 return $perl_exec;
1464}
1465
[25533]1466# returns the path to the java command in the JRE included with GS (if any),
1467# quoted to safeguard any spaces in this path, otherwise a simple java
1468# command is returned which assumes and will try for a system java.
[25512]1469sub get_java_command {
1470 my $java = "java";
1471 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1472 # after running setup.bat or from GLI which also runs setup.bat
[27303]1473 my $java_bin = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"packages","jre","bin");
[25512]1474 if(-d $java_bin) {
[27303]1475 $java = &FileUtils::filenameConcatenate($java_bin,"java");
[25533]1476 $java = "\"".$java."\""; # quoted to preserve spaces in path
[25512]1477 }
1478 }
1479 return $java;
1480}
[24362]1481
[25512]1482
[25577]1483# Given the qualified collection name (colgroup/collection),
1484# returns the collection and colgroup parts
1485sub get_collection_parts {
1486 # http://perldoc.perl.org/File/Basename.html
1487 # my($filename, $directories, $suffix) = fileparse($path);
1488 # "$directories contains everything up to and including the last directory separator in the $path
1489 # including the volume (if applicable). The remainder of the $path is the $filename."
1490 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1491
1492 my $qualified_collection = shift(@_);
1493
1494 # Since activate.pl can be launched from the command-line, including by a user,
1495 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1496 # Also allow for the accidental inclusion of multiple slashes
1497 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1498
1499 if(!defined $collection) {
1500 $collection = $colgroup;
1501 $colgroup = "";
1502 }
1503 return ($collection, $colgroup);
1504}
1505
1506# work out the "collectdir/collection" location
1507sub resolve_collection_dir {
1508 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1509
[28211]1510 if (defined $ENV{'GSDLCOLLECTDIR'}) { # a predefined collection dir exists
1511 return $ENV{'GSDLCOLLECTDIR'};
1512 }
1513
[25577]1514 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1515
[28175]1516 if (!defined $collect_dir || !$collect_dir) { # if undefined or empty string
1517 $collect_dir = &util::get_working_collect_dir($site);
[25577]1518 }
[28175]1519
1520 return &FileUtils::filenameConcatenate($collect_dir,$colgroup,$collection);
1521}
1522
1523# work out the full path to "collect" of this greenstone 2/3 installation
1524sub get_working_collect_dir {
[28211]1525 my ($site) = @_;
1526
1527 if (defined $ENV{'GSDLCOLLECTHOME'}) { # a predefined collect dir exists
1528 return $ENV{'GSDLCOLLECTHOME'};
[28175]1529 }
[28211]1530
1531 if (defined $site && $site) { # site non-empty, so get default collect dir for GS3
1532
1533 if (defined $ENV{'GSDL3HOME'}) {
1534 return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect"); # web folder
1535 }
1536 elsif (defined $ENV{'GSDL3SRCHOME'}) {
1537 return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site,"collect");
1538 }
1539 }
1540
[28213]1541 elsif (defined $ENV{'SITEHOME'}) {
[28211]1542 return &FileUtils::filenameConcatenate($ENV{'SITEHOME'},"collect");
[28177]1543 }
1544
1545 else { # get default collect dir for GS2
1546 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect");
1547 }
1548}
1549
[28211]1550sub is_abs_path_any_os {
[28177]1551 my ($path) = @_;
1552
[28211]1553 # We can have filenames in our DBs that were produced on other OS, so this method exists
1554 # to help identify absolute paths in such cases.
1555
[28177]1556 return 1 if($path =~ m@^/@); # full paths begin with forward slash on linux/mac
1557 return 1 if($path =~ m@^([a-zA-Z]\:|\\)@); # full paths begin with drive letter colon for Win or \ for volume, http://stackoverflow.com/questions/13011013/get-only-volume-name-from-filepath
1558
1559 return 0;
1560}
1561
1562
1563# This subroutine is for improving portability of Greenstone collections from one OS to another,
1564# to be used to convert absolute paths going into db files into paths with placeholders instead.
1565# This sub works with util::get_common_gs_paths and takes a path to a greenstone file and, if it's
1566# an absolute path, then it will replace the longest matching greenstone-path prefix of the given
1567# path with a placeholder to match.
1568# The Greenstone-path prefixes that can be matched are the following common Greenstone paths:
1569# the path to the current (specific) collection, the path to the general GS collect directory,
1570# the path to the site directory if GS3, else the path to the GSDLHOME/GSDL3HOME folder.
1571# The longest matching prefix will be replaced with the equivalent placeholder:
1572# @THISCOLLECTPATH@, else @COLLECTHOME@, else @SITEHOME@, else @GSDLHOME@.
1573sub abspath_to_placeholders {
1574 my $path = shift(@_); # path to convert from absolute to one with placeholders
[28236]1575 my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
[28177]1576
[28211]1577 return $path unless is_abs_path_any_os($path); # path is relative
[28236]1578
1579 if ($opt_long_or_short_winfilenames eq "long") {
1580 $path = &util::upgrade_if_dos_filename($path); # will only do something on windows
1581 }
[28225]1582
[28177]1583 # now we know we're dealing with absolute paths and have to replace gs prefixes with placeholders
[28213]1584 my @gs_paths = ($ENV{'GSDLCOLLECTDIR'}, $ENV{'GSDLCOLLECTHOME'}, $ENV{'SITEHOME'}, $ENV{'GREENSTONEHOME'}); # list in this order: from longest to shortest path
[28177]1585
[28211]1586 my %placeholder_map = ($ENV{'GREENSTONEHOME'} => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1587 $ENV{'GSDLCOLLECTHOME'} => '@COLLECTHOME@',
1588 $ENV{'GSDLCOLLECTDIR'} => '@THISCOLLECTPATH@'
[28177]1589 );
[28213]1590 $placeholder_map{$ENV{'SITEHOME'}} = '@SITEHOME@' if defined $ENV{'SITEHOME'};
[28177]1591
[28228]1592 $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
[28177]1593
[28236]1594 if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
[28228]1595 # for windows need to look for matches on short file names too
1596 # matched paths are again to be replaced with the usual placeholders
1597
1598 my $gsdlcollectdir = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'});
1599 my $gsdlcollecthome = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'});
1600 my $sitehome = (defined $ENV{'SITEHOME'}) ? &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) : undef;
1601 my $greenstonehome = &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'});
1602
1603 @gs_paths = ($gsdlcollectdir, $gsdlcollecthome, $sitehome, $greenstonehome); # order matters
1604
1605 %placeholder_map = ($greenstonehome => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1606 $gsdlcollecthome => '@COLLECTHOME@',
1607 $gsdlcollectdir => '@THISCOLLECTPATH@'
1608 );
1609 $placeholder_map{$sitehome} = '@SITEHOME@' if defined $sitehome;
1610
1611 $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
1612 }
1613
1614 return $path;
1615}
1616
1617sub _abspath_to_placeholders {
1618 my ($path, $gs_paths_ref, $placeholder_map_ref) = @_;
1619
[28177]1620 # The sequence of elements in @gs_paths matters
1621 # Need to loop starting from the *longest* matching path (the path to the specific collection)
1622 # to the shortest matching path (the path to gsdlhome/gsdl3home folder):
1623
[28228]1624 foreach my $gs_path (@$gs_paths_ref) {
[28213]1625 next if(!defined $gs_path); # site undefined for GS2
[28211]1626
[28177]1627 my $re_path = &util::filename_to_regex($gs_path); # escape for regex
1628
[28211]1629 if($path =~ m/^$re_path/i) { # case sensitive or not for OS?
[28177]1630
[28228]1631 my $placeholder = $placeholder_map_ref->{$gs_path}; # get the placeholder to replace the matched path with
[28177]1632
1633 $path =~ s/^$re_path/$placeholder/; #case sensitive or not?
1634 #$path =~ s/^[\\\/]//; # remove gs_path's trailing separator left behind at the start of the path
[28604]1635 # lowercase file extension, This is needed when shortfilenames are used, as case affects alphetical ordering, which affects diffcol
1636 $path =~ s/\.([A-Z]+)$/".".lc($1)/e;
[28177]1637 last; # done
1638 }
1639 }
1640
1641 return $path;
1642}
1643
1644# Function that does the reverse of the util::abspath_to_placeholders subroutine
1645# Once again, call this with the values returned from util::get_common_gs_paths
1646sub placeholders_to_abspath {
1647 my $path = shift(@_); # path that can contain placeholders to convert to resolved absolute path
[28236]1648 my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
[28177]1649
1650 return $path if($path !~ m/@/); # path contains no placeholders
1651
[28213]1652 # replace placeholders with gs prefixes
[28177]1653 my @placeholders = ('@THISCOLLECTPATH@', '@COLLECTHOME@', '@SITEHOME@', '@GSDLHOME@'); # order of paths not crucial in this case,
1654 # but listed here from longest to shortest once placeholders are have been resolved
1655
[28213]1656 # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
[28228]1657 my %placeholder_to_gspath_map;
[28236]1658 if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
[28228]1659 # always replace placeholders with short file names of the absolute paths on windows?
1660 %placeholder_to_gspath_map = ('@GSDLHOME@' => &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'}),
1661 '@COLLECTHOME@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'}),
1662 '@THISCOLLECTPATH@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'})
1663 );
1664 $placeholder_to_gspath_map{'@SITEHOME@'} = &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) if defined $ENV{'SITEHOME'};
1665 } else {
1666 %placeholder_to_gspath_map = ('@GSDLHOME@' => $ENV{'GREENSTONEHOME'},
1667 '@SITEHOME@' => $ENV{'SITEHOME'}, # can be undef
1668 '@COLLECTHOME@' => $ENV{'GSDLCOLLECTHOME'},
1669 '@THISCOLLECTPATH@' => $ENV{'GSDLCOLLECTDIR'}
1670 ); # $placeholder_to_gspath_map{'@SITEHOME@'} = $ENV{'SITEHOME'} if defined $ENV{'SITEHOME'};
1671 }
[28177]1672
1673 foreach my $placeholder (@placeholders) {
1674 my $gs_path = $placeholder_to_gspath_map{$placeholder};
1675
[28213]1676 next if(!defined $gs_path); # sitehome for GS2 is undefined
[28177]1677
1678 if($path =~ m/^$placeholder/) {
1679 $path =~ s/^$placeholder/$gs_path/;
1680 last; # done
1681 }
1682 }
1683
1684 return $path;
1685}
1686
[25994]1687# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1688# a directory containing sequentially numbered images.
1689sub create_itemfile
1690{
1691 my ($output_dir, $convert_basename, $convert_to) = @_;
[27303]1692 my $page_num = "";
[25994]1693
[27303]1694 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
[25994]1695 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
[27303]1696 closedir DIR;
[25994]1697
[27303]1698 # Sort files in the directory by page_num
[25994]1699 sub page_number {
1700 my ($dir) = @_;
[27970]1701 my ($pagenum) =($dir =~ m/^.*?[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1702# my ($pagenum) =($dir =~ m/(\d+)(\.(jpg|gif|png))?$/i); # this works but is not as safe/strict about input filepatterns as the above
[25994]1703
1704 $pagenum = 1 unless defined $pagenum;
1705 return $pagenum;
1706 }
1707
[27303]1708 # sort the files in the directory in the order of page_num rather than lexically.
[25994]1709 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1710
1711 # work out if the numbering of the now sorted image files starts at 0 or not
1712 # by checking the number of the first _image_ file (skipping item files)
1713 my $starts_at_0 = 0;
1714 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1715 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1716 $starts_at_0 = 1;
1717 }
1718
[27303]1719 my $item_file = &FileUtils::filenameConcatenate($output_dir, $convert_basename.".item");
1720 my $item_fh;
1721 &FileUtils::openFileHandle($item_file, 'w', \$item_fh);
1722 print $item_fh "<PagedDocument>\n";
[25994]1723
1724 foreach my $file (@dir_files){
1725 if ($file !~ /\.item/i){
1726 $page_num = page_number($file);
1727 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
[27303]1728 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1729 }
[25994]1730 }
1731
[27303]1732 print $item_fh "</PagedDocument>\n";
1733 &FileUtils::closeFileHandle($item_file, \$item_fh);
[25994]1734 return $item_file;
1735}
1736
[28560]1737# Sets the gnomelib_env. Based on the logic in wvware.pl which can perhaps be replaced with a call to this function in future
1738sub set_gnomelib_env
1739{
1740 ## SET THE ENVIRONMENT AS DONE IN SETUP.BASH/BAT OF GNOME-LIB
1741 # Though this is only needed for darwin Lion at this point (and android, though that is untested)
[27303]1742
[28560]1743 my $libext = "so";
1744 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1745 return;
1746 } elsif ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
1747 $libext = "dylib";
1748 }
1749
1750 if (!defined $ENV{'GEXTGNOME'}) {
1751 ##print STDERR "@@@ Setting GEXTGNOME env\n";
1752
1753 my $gnome_dir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"ext","gnome-lib-minimal");
1754
1755 if(! -d $gnome_dir) {
1756 $gnome_dir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"ext","gnome-lib");
1757
1758 if(! -d $gnome_dir) {
1759 $gnome_dir = "";
1760 }
1761 }
1762
1763 # now set other the related env vars,
1764 # IF we've found the gnome-lib dir installed in the ext folder
1765
1766 if ($gnome_dir ne "" && -f &FileUtils::filenameConcatenate($gnome_dir, $ENV{'GSDLOS'}, "lib", "libiconv.$libext")) {
1767 $ENV{'GEXTGNOME'} = $gnome_dir;
1768 $ENV{'GEXTGNOME_INSTALLED'}=&FileUtils::filenameConcatenate($ENV{'GEXTGNOME'}, $ENV{'GSDLOS'});
1769
1770 my $gnomelib_bin = &FileUtils::filenameConcatenate($ENV{'GEXTGNOME_INSTALLED'}, "bin");
1771 if(-d $gnomelib_bin) { # no bin subfolder in GS binary's cutdown gnome-lib-minimal folder
1772 &util::envvar_prepend("PATH", $gnomelib_bin);
1773 }
1774
1775 # util's prepend will create LD/DYLD_LIB_PATH if it doesn't exist yet
1776 my $gextlib = &FileUtils::filenameConcatenate($ENV{'GEXTGNOME_INSTALLED'}, "lib");
1777
1778 if($ENV{'GSDLOS'} eq "linux") {
1779 &util::envvar_prepend("LD_LIBRARY_PATH", $gextlib);
1780 }
1781 elsif ($ENV{'GSDLOS'} eq "darwin") {
1782 #&util::envvar_prepend("DYLD_LIBRARY_PATH", $gextlib);
1783 &util::envvar_prepend("DYLD_FALLBACK_LIBRARY_PATH", $gextlib);
1784 }
1785 }
1786
1787 # Above largely mimics the setup.bash of the gnome-lib-minimal.
1788 # Not doing the devel-srcpack that gnome-lib-minimal's setup.bash used to set
1789 # Not exporting GSDLEXTS variable either
1790 }
1791
1792# print STDERR "@@@@@ GEXTGNOME: ".$ENV{'GEXTGNOME'}."\n\tINSTALL".$ENV{'GEXTGNOME_INSTALLED'}."\n";
1793# print STDERR "\tPATH".$ENV{'PATH'}."\n";
1794# print STDERR "\tLD_LIB_PATH".$ENV{'LD_LIBRARY_PATH'}."\n" if $ENV{'LD_LIBRARY_PATH};
1795# print STDERR "\tDYLD_FALLBACK_LIB_PATH".$ENV{'DYLD_FALLBACK_LIBRARY_PATH'}."\n" if $ENV{'DYLD_FALLBACK_LIBRARY_PATH};
1796
1797 # if no GEXTGNOME, maybe users didn't need gnome-lib to run gnomelib/libiconv dependent binaries like hashfile, suffix, wget
1798 # (wvware is launched in a gnomelib env from its own script, but could possibly go through this script in future)
1799}
1800
1801
1802
[27374]1803## @function augmentINC()
1804#
1805# Prepend a path (if it exists) onto INC but only if it isn't already in INC
1806# @param $new_path The path to add
1807# @author jmt12
1808#
[27303]1809sub augmentINC
1810{
1811 my ($new_path) = @_;
1812 my $did_add_path = 0;
[27374]1813 # might need to be replaced with FileUtils::directoryExists() call eventually
[27303]1814 if (-d $new_path)
1815 {
1816 my $did_find_path = 0;
1817 foreach my $existing_path (@INC)
1818 {
1819 if ($existing_path eq $new_path)
1820 {
1821 $did_find_path = 1;
1822 last;
1823 }
1824 }
1825 if (!$did_find_path)
1826 {
1827 unshift(@INC, $new_path);
1828 $did_add_path = 1;
1829 }
1830 }
1831 return $did_add_path;
1832}
[27374]1833## augmentINC()
[27303]1834
[27374]1835
[4]18361;
Note: See TracBrowser for help on using the repository browser.