source: main/trunk/greenstone2/perllib/util.pm@ 28375

Last change on this file since 28375 was 28375, checked in by davidb, 11 years ago

A set of changes to help Greenstone building code (perl) run under Cygwin. The test is designed to be mutually to when run natively on Windows. In effect the refined test is saying: if you're windows but not cygwin then do as you used to do for Windows, otherwise go with Unix (as Cygwin is effectively giving you a Unix like operating system to run in)

  • Property svn:keywords set to Author Date Id Revision
File size: 50.1 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
[27509]29use FileUtils;
[23362]30
31use Encode;
[4]32use File::Copy;
[619]33use File::Basename;
[24362]34# Config for getting the perlpath in the recommended way, though it uses paths that are
35# hard-coded into the Config file that's generated upon configuring and compiling perl.
36# $^X works better in some cases to return the path to perl used to launch the script,
37# but if launched with plain "perl" (no full-path), that will be just what it returns.
[27303]38use Config;
39# New module for file related utility functions - intended as a
40# placeholder for an extension that allows a variety of different
41# filesystems (FTP, HTTP, SAMBA, WEBDav, HDFS etc)
42use FileUtils;
[4]43
[28226]44if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[28225]45 require Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
46}
47
[4]48# removes files (but not directories)
49sub rm {
[27303]50 warnings::warnif("deprecated", "util::rm() is deprecated, using FileUtils::removeFiles() instead");
51 return &FileUtils::removeFiles(@_);
[4]52}
53
54# recursive removal
[10211]55sub filtered_rm_r {
[27303]56 warnings::warnif("deprecated", "util::filtered_rm_r() is deprecated, using FileUtils::removeFilesFiltered() instead");
57 return &FileUtils::removeFilesFiltered(@_);
[4]58}
59
[10211]60# recursive removal
61sub rm_r {
[27326]62 warnings::warnif("deprecated", "util::rm_r() is deprecated, using FileUtils::removeFilesRecursive() instead");
[27303]63 return &FileUtils::removeFilesRecursive(@_);
[10211]64}
65
[721]66# moves a file or a group of files
67sub mv {
[27303]68 warnings::warnif("deprecated", "util::mv() is deprecated, using FileUtils::moveFiles() instead");
69 return &FileUtils::moveFiles(@_);
[721]70}
71
[25554]72# Move the contents of source directory into target directory
73# (as opposed to merely replacing target dir with the src dir)
74# This can overwrite any files with duplicate names in the target
75# but other files and folders in the target will continue to exist
76sub mv_dir_contents {
[27303]77 warnings::warnif("deprecated", "util::mv_dir_contents() is deprecated, using FileUtils::moveDirectoryContents() instead");
78 return &FileUtils::moveDirectoryContents(@_);
[25554]79}
[721]80
[4]81# copies a file or a group of files
82sub cp {
[27303]83 warnings::warnif("deprecated", "util::cp() is deprecated, using FileUtils::copyFiles() instead");
84 return &FileUtils::copyFiles(@_);
[4]85}
86
87# recursively copies a file or group of files
[1454]88# syntax: cp_r (sourcefiles, destination directory)
89# destination must be a directory - to copy one file to
90# another use cp instead
[4]91sub cp_r {
[27303]92 warnings::warnif("deprecated", "util::cp_r() is deprecated, using FileUtils::copyFilesrecursive() instead");
93 return &FileUtils::copyFilesRecursive(@_);
94}
[4]95
[21762]96# recursively copies a file or group of files
97# syntax: cp_r (sourcefiles, destination directory)
98# destination must be a directory - to copy one file to
99# another use cp instead
100sub cp_r_nosvn {
[27303]101 warnings::warnif("deprecated", "util::cp_r_nosvn() is deprecated, using FileUtils::copyFilesRecursiveNoSVN() instead");
102 return &FileUtils::copyFilesRecursiveNoSVN(@_);
[21762]103}
104
[11179]105# copies a directory and its contents, excluding subdirectories, into a new directory
106sub cp_r_toplevel {
[27303]107 warnings::warnif("deprecated", "util::cp_r_toplevel() is deprecated, using FileUtils::recursiveCopyTopLevel() instead");
108 return &FileUtils::recursiveCopyTopLevel(@_);
[11179]109}
110
[721]111sub mk_dir {
[27303]112 warnings::warnif("deprecated", "util::mk_dir() is deprecated, using FileUtils::makeDirectory() instead");
113 return &FileUtils::makeDirectory(@_);
[721]114}
115
[1046]116# in case anyone cares - I did some testing (using perls Benchmark module)
117# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
118# slightly faster (surprisingly) - Stefan.
[4]119sub mk_all_dir {
[27303]120 warnings::warnif("deprecated", "util::mk_all_dir() is deprecated, using FileUtils::makeAllDirectories() instead");
121 return &FileUtils::makeAllDirectories(@_);
[4]122}
123
[619]124# make hard link to file if supported by OS, otherwise copy the file
125sub hard_link {
[27303]126 warnings::warnif("deprecated", "util::hard_link() is deprecated, using FileUtils::hardLink() instead");
127 return &FileUtils::hardLink(@_);
[619]128}
129
[2193]130# make soft link to file if supported by OS, otherwise copy file
[721]131sub soft_link {
[27303]132 warnings::warnif("deprecated", "util::soft_link() is deprecated, using FileUtils::softLink() instead");
133 return &FileUtils::softLink(@_);
[721]134}
135
[23362]136# Primarily for filenames generated by processing
137# content of HTML files (which are mapped to UTF-8 internally)
138#
139# To turn this into an octet string that really exists on the file
140# system:
141# 1. don't need to do anything special for Unix-based systems
142# (as underlying file system is byte-code)
143# 2. need to map to short DOS filenames for Windows
[721]144
[23362]145sub utf8_to_real_filename
146{
147 my ($utf8_filename) = @_;
[721]148
[23362]149 my $real_filename;
[721]150
[28375]151 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]152 require Win32;
[23388]153
154 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
155
[23362]156 my $unicode_filename = decode("utf8",$utf8_filename);
157 $real_filename = Win32::GetShortPathName($unicode_filename);
158 }
159 else {
160 $real_filename = $utf8_filename;
161 }
162
163 return $real_filename;
164}
165
[27303]166sub fd_exists {
167 warnings::warnif("deprecated", "util::fd_exists() is deprecated, using FileUtils::fileTest() instead");
168 return &FileUtils::fileTest(@_);
[23362]169}
170
[27303]171sub file_exists {
172 warnings::warnif("deprecated", "util::file_exists() is deprecated, using FileUtils::fileExists() instead");
173 return &FileUtils::fileExists(@_);
[23362]174}
175
[27303]176sub dir_exists {
177 warnings::warnif("deprecated", "util::dir_exists() is deprecated, using FileUtils::directoryExists() instead");
178 return &FileUtils::directoryExists(@_);
[23362]179}
180
[4]181# updates a copy of a directory in some other part of the filesystem
182# verbosity settings are: 0=low, 1=normal, 2=high
183# both $fromdir and $todir should be absolute paths
184sub cachedir {
[27303]185 warnings::warnif("deprecated", "util::cachedir() is deprecated, using FileUtils::synchronizeDirectories() instead");
186 return &FileUtils::synchronizeDirectories(@_);
[4]187}
188
189# this function returns -1 if either file is not found
190# assumes that $file1 and $file2 are absolute file names or
191# in the current directory
192# $file2 is allowed to be newer than $file1
193sub differentfiles {
[27303]194 warnings::warnif("deprecated", "util::differentfiles() is deprecated, using FileUtils::differentFiles() instead");
195 return &FileUtils::differentFiles(@_);
[4]196}
197
198
[16266]199sub get_tmp_filename
200{
201 my $file_ext = shift(@_) || undef;
202
[22438]203 my $opt_dot_file_ext = "";
204 if (defined $file_ext) {
205 if ($file_ext !~ m/\./) {
206 # no dot, so needs one added in at start
207 $opt_dot_file_ext = ".$file_ext"
208 }
209 else {
210 # allow for "extensions" such as _metadata.txt to be handled
211 # gracefully
212 $opt_dot_file_ext = $file_ext;
213 }
214 }
[16266]215
[27303]216 my $tmpdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
[27819]217 &FileUtils::makeAllDirectories ($tmpdir) unless -e $tmpdir;
[4]218
219 my $count = 1000;
220 my $rand = int(rand $count);
[27303]221 my $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[16266]222
223 while (-e $full_tmp_filename) {
[4]224 $rand = int(rand $count);
[27303]225 $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[4]226 $count++;
227 }
[16266]228
229 return $full_tmp_filename;
[4]230}
231
[28066]232# These 2 are "static" variables used by the get_timestamped_tmp_folder() subroutine below and
233# belong with that function. They help ensure the timestamped tmp folders generated are unique.
234my $previous_timestamp = undef;
235my $previous_timestamp_f = 0; # frequency
236
[22886]237sub get_timestamped_tmp_folder
[22873]238{
[28066]239
[22886]240 my $tmp_dirname;
[22873]241 if(defined $ENV{'GSDLCOLLECTDIR'}) {
242 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
243 } elsif(defined $ENV{'GSDLHOME'}) {
244 $tmp_dirname = $ENV{'GSDLHOME'};
[22886]245 } else {
246 return undef;
[22873]247 }
248
[27303]249 $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "tmp");
[27509]250 &FileUtils::makeDirectory($tmp_dirname) if (!-e $tmp_dirname);
[22873]251
252 # add the timestamp into the path otherwise we can run into problems
253 # if documents have the same name
[28066]254 my $timestamp = time;
255
256 if (!defined $previous_timestamp || ($timestamp > $previous_timestamp)) {
257 $previous_timestamp_f = 0;
258 $previous_timestamp = $timestamp;
259 } else {
260 $previous_timestamp_f++;
261 }
262
[27303]263 my $time_tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, $timestamp);
[28066]264 $tmp_dirname = $time_tmp_dirname;
265 my $i = $previous_timestamp_f;
266
267 if($previous_timestamp_f > 0) {
268 $tmp_dirname = $time_tmp_dirname."_".$i;
269 $i++;
270 }
[22873]271 while (-e $tmp_dirname) {
[28066]272 $tmp_dirname = $time_tmp_dirname."_".$i;
[22873]273 $i++;
274 }
[28066]275 &FileUtils::makeDirectory($tmp_dirname);
276
[22886]277 return $tmp_dirname;
278}
[22873]279
[22886]280sub get_timestamped_tmp_filename_in_collection
281{
282
283 my ($input_filename, $output_ext) = @_;
284 # derive tmp filename from input filename
285 my ($tailname, $dirname, $suffix)
286 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
287
288 # softlink to collection tmp dir
289 my $tmp_dirname = &util::get_timestamped_tmp_folder();
290 $tmp_dirname = $dirname unless defined $tmp_dirname;
291
[22873]292 # following two steps copied from ConvertBinaryFile
[22886]293 # do we need them?? can't use them as is, as they use plugin methods.
294
[22873]295 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
296
297 # URLEncode this since htmls with images where the html filename is utf8 don't seem
298 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
299 # files on the filesystem.
300 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
301 if (defined $output_ext) {
302 $output_ext = ".$output_ext"; # add the dot
303 } else {
304 $output_ext = $suffix;
305 }
306 $output_ext= lc($output_ext);
[27303]307 my $tmp_filename = &FileUtils::filenameConcatenate($tmp_dirname, "$tailname$output_ext");
[22873]308
309 return $tmp_filename;
310}
311
[21218]312sub get_toplevel_tmp_dir
313{
[27303]314 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
[21218]315}
316
317
[17512]318sub filename_to_regex {
319 my $filename = shift (@_);
[4]320
[24971]321 # need to make single backslashes double so that regex works
[24832]322 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
[24829]323
[24832]324 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
325 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
[24829]326 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
327 $filename =~ s@\(@\\(@g; # escape brackets
328 $filename =~ s@\)@\\)@g; # escape brackets
[24932]329 $filename =~ s@\[@\\[@g; # escape brackets
330 $filename =~ s@\]@\\]@g; # escape brackets
[24829]331
[17512]332 return $filename;
333}
334
[24829]335sub unregex_filename {
336 my $filename = shift (@_);
337
338 # need to put doubled backslashes for regex back to single
339 $filename =~ s/\\\./\./g; # remove RE syntax for .
340 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
341 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
[24932]342 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
343 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
[24940]344
345 # \\ goes to \
346 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
347 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
[24829]348 return $filename;
349}
350
[4]351sub filename_cat {
[27303]352 # I've disabled this warning for now, as every Greenstone perl
353 # script seems to make use of this function and so you drown in a
354 # sea of deprecated warnings [jmt12]
355# warnings::warnif("deprecated", "util::filename_cat() is deprecated, using FileUtils::filenameConcatenate() instead");
356 return &FileUtils::filenameConcatenate(@_);
[4]357}
358
[21413]359
360sub pathname_cat {
361 my $first_path = shift(@_);
362 my (@pathnames) = @_;
363
364 # If first_path is not null or empty, then add it back into the list
365 if (defined $first_path && $first_path =~ /\S/) {
366 unshift(@pathnames, $first_path);
367 }
368
[21425]369 my $join_char;
[28375]370 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[21413]371 $join_char = ";";
372 } else {
373 $join_char = ":";
374 }
375
376 my $pathname = join($join_char, @pathnames);
377
378 # remove duplicate slashes
[28375]379 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[21413]380 $pathname =~ s/[\\\/]+/\\/g;
381 } else {
382 $pathname =~ s/[\/]+/\//g;
383 # DB: want a pathname abc\de.html to remain like this
384 }
385
386 return $pathname;
387}
388
389
[19616]390sub tidy_up_oid {
391 my ($OID) = @_;
392 if ($OID =~ /\./) {
393 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
394 $OID =~ s/\.//g; #remove any periods
395 }
396 if ($OID =~ /^\s.*\s$/) {
397 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
398 # remove starting and trailing whitespace
399 $OID =~ s/^\s+//;
400 $OID =~ s/\s+$//;
401 }
402 if ($OID =~ /^[\d]*$/) {
403 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
404 $OID = "D" . $OID;
405 }
406
407 return $OID;
408}
[26206]409
[10212]410sub envvar_prepend {
411 my ($var,$val) = @_;
412
[26206]413 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
[28375]414## my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[26206]415
[28375]416 # Rewritten above to make ":" the default (Windows is the special
417 # case, anything else 'unusual' such as Solaris etc is Unix)
418 my $pathsep = (defined $ENV{'GSDLOS'} && (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin"))) ? ";" : ":";
419
[16404]420 # do not prepend any value/path that's already in the environment variable
[24832]421
422 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
423 if (!defined($ENV{$var})) {
424 $ENV{$var} = "$val";
[16442]425 }
[24832]426 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]427 $ENV{$var} = "$val".$pathsep.$ENV{$var};
[10212]428 }
429}
430
431sub envvar_append {
432 my ($var,$val) = @_;
[26206]433
434 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
435 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[24832]436
[16404]437 # do not append any value/path that's already in the environment variable
[26206]438
[24832]439 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
440 if (!defined($ENV{$var})) {
441 $ENV{$var} = "$val";
[16442]442 }
[24832]443 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]444 $ENV{$var} = $ENV{$var}.$pathsep."$val";
[24832]445 }
[10212]446}
447
[16442]448
[16380]449# splits a filename into a prefix and a tail extension using the tail_re, or
450# if that fails, splits on the file_extension . (dot)
451sub get_prefix_and_tail_by_regex {
[10212]452
[16380]453 my ($filename,$tail_re) = @_;
454
455 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
456 if ((!defined $file_prefix) || (!defined $file_ext)) {
457 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
458 }
459
460 return ($file_prefix,$file_ext);
461}
462
463# get full path and file only path from a base_dir (which may be empty) and
464# file (which may contain directories)
465sub get_full_filenames {
466 my ($base_dir, $file) = @_;
467
[28375]468# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
469# my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
470# print STDERR "** Calling method: $lcfilename:$cline $cpackage->$csubr\n";
471
472
[16380]473 my $filename_full_path = $file;
474 # add on directory if present
[27303]475 $filename_full_path = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\S/;
[16380]476
477 my $filename_no_path = $file;
478
479 # remove directory if present
480 $filename_no_path =~ s/^.*[\/\\]//;
481 return ($filename_full_path, $filename_no_path);
482}
483
[8682]484# returns the path of a file without the filename -- ie. the directory the file is in
485sub filename_head {
486 my $filename = shift(@_);
487
[28375]488 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[8682]489 $filename =~ s/[^\\\\]*$//;
490 }
491 else {
492 $filename =~ s/[^\\\/]*$//;
493 }
494
495 return $filename;
496}
497
498
[23362]499
[1454]500# returns 1 if filename1 and filename2 point to the same
501# file or directory
502sub filenames_equal {
503 my ($filename1, $filename2) = @_;
504
505 # use filename_cat to clean up trailing slashes and
506 # multiple slashes
[27303]507 $filename1 = &FileUtils::filenameConcatenate($filename1);
508 $filename2 = &FileUtils::filenameConcatenate($filename2);
[1454]509
510 # filenames not case sensitive on windows
511 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
512 $filename1 =~ tr/[A-Z]/[a-z]/;
513 $filename2 =~ tr/[A-Z]/[a-z]/;
514 }
515 return 1 if $filename1 eq $filename2;
516 return 0;
517}
518
[24932]519# If filename is relative to within_dir, returns the relative path of filename to that directory
520# with slashes in the filename returned as they were in the original (absolute) filename.
[23362]521sub filename_within_directory
522{
523 my ($filename,$within_dir) = @_;
524
[23371]525 if ($within_dir !~ m/[\/\\]$/) {
526 my $dirsep = &util::get_dirsep();
[23362]527 $within_dir .= $dirsep;
528 }
529
[24829]530 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]531 if ($filename =~ m/^$within_dir(.*)$/) {
532 $filename = $1;
533 }
534
535 return $filename;
536}
537
[24932]538# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
539# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
540# The subpath returned will also be a URL type filename.
541sub filename_within_directory_url_format
542{
543 my ($filename,$within_dir) = @_;
544
545 # convert parameters only to / slashes if Windows
546
[24971]547 my $filename_urlformat = &filepath_to_url_format($filename);
548 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
549
[24932]550 #if ($within_dir_urlformat !~ m/\/$/) {
551 # make sure directory ends with a slash
552 #$within_dir_urlformat .= "/";
553 #}
554
555 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
556
557 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
558
559 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
560 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
561 $filename_urlformat = $1;
562 }
563
564 return $filename_urlformat;
565}
566
[24971]567# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
568# since on Linux it doesn't represent a file separator but an escape char).
569sub filepath_to_url_format
570{
571 my ($filepath) = @_;
[28375]572 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[24971]573 # Only need to worry about Windows, as Unix style directories already in url-format
574 # Convert Windows style \ => /
575 $filepath =~ s@\\@/@g;
576 }
577 return $filepath;
578}
[24932]579
[25093]580# regex filepaths on windows may include \\ as path separator. Convert \\ to /
581sub filepath_regex_to_url_format
582{
583 my ($filepath) = @_;
[28375]584 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[25093]585 # Only need to worry about Windows, as Unix style directories already in url-format
586 # Convert Windows style \\ => /
587 $filepath =~ s@\\\\@/@g;
588 }
589 return $filepath;
590
591}
[24971]592
[25093]593# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
594# and ignores trailing /
595# returns (file, dirs) dirs will be empty if no subdirs
596sub url_fileparse
597{
598 my ($filepath) = @_;
599 # remove trailing /
600 $filepath =~ s@/$@@;
601 if ($filepath !~ m@/@) {
602 return ($filepath, "");
603 }
604 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
605 return ($file, $dirs);
606
607}
608
609
[10281]610sub filename_within_collection
611{
612 my ($filename) = @_;
613
614 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
615
616 if (defined $collect_dir) {
[23362]617
[15875]618 # if from within GSDLCOLLECTDIR, then remove directory prefix
619 # so source_filename is realative to it. This is done to aid
620 # portability, i.e. the collection can be moved to somewhere
621 # else on the file system and the archives directory will still
622 # work. This is needed, for example in the applet version of
623 # GLI where GSDLHOME/collect on the server will be different to
624 # the collect directory of the remove user. Of course,
625 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
626 # it back into a full pathname.
[23362]627
628 $filename = filename_within_directory($filename,$collect_dir);
[10281]629 }
630
631 return $filename;
632}
633
[23362]634sub prettyprint_file
635{
[23484]636 my ($base_dir,$file,$gli) = @_;
[23362]637
[27303]638 my $filename_full_path = &FileUtils::filenameConcatenate($base_dir,$file);
[23362]639
[28375]640 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]641 require Win32;
642
643 # For some reason base_dir in the form c:/a/b/c
644 # This leads to confusion later on, so turn it back into
645 # the more usual Windows form
646 $base_dir =~ s/\//\\/g;
647 my $long_base_dir = Win32::GetLongPathName($base_dir);
648 my $long_full_path = Win32::GetLongPathName($filename_full_path);
649
650 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]651 $file = encode("utf8",$file) if ($gli);
[23362]652 }
653
654 return $file;
655}
656
657
658sub upgrade_if_dos_filename
659{
[23371]660 my ($filename_full_path,$and_encode) = @_;
[23362]661
[28375]662 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23362]663 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
664 # to its long (Windows) version
[23416]665 my $long_filename = Win32::GetLongPathName($filename_full_path);
666 if (defined $long_filename) {
667 $filename_full_path = $long_filename;
668 }
[23362]669 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]670 $filename_full_path =~ s/^(.):/\u$1:/;
[23371]671 if ((defined $and_encode) && ($and_encode)) {
672 $filename_full_path = encode("utf8",$filename_full_path);
673 }
[23362]674 }
675
676 return $filename_full_path;
677}
678
679
[23388]680sub downgrade_if_dos_filename
681{
682 my ($filename_full_path) = @_;
683
[28375]684 if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
[23388]685 require Win32;
686
687 # Ensure the given long Windows filename is in a form that can
688 # be opened by Perl => convert it to a short DOS-like filename
689
[23414]690 my $short_filename = Win32::GetShortPathName($filename_full_path);
691 if (defined $short_filename) {
692 $filename_full_path = $short_filename;
693 }
[23416]694 # Make sure initial drive letter is lower-case (to fit in
695 # with rest of Greenstone)
[23483]696 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]697 }
698
699 return $filename_full_path;
700}
701
[23561]702sub block_filename
703{
704 my ($block_hash,$filename) = @_;
705
[28375]706 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
[23561]707
708 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
709 my $lower_filename = lc($filename);
710 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
711# my $lower_drive = $filename;
712# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
713
714# my $upper_drive = $filename;
715# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
716#
717# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
718# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
719 }
720 else {
721 $block_hash->{'file_blocks'}->{$filename} = 1;
722 }
723}
[23388]724
[23561]725
[18441]726sub filename_is_absolute
727{
[27303]728 warnings::warnif("deprecated", "util::filename_is_absolute() is deprecated, using FileUtils::isFilenameAbsolute() instead");
729 return &FileUtils::isFilenameAbsolute(@_);
[18441]730}
731
732
[17572]733## @method make_absolute()
734#
735# Ensure the given file path is absolute in respect to the given base path.
736#
737# @param $base_dir A string denoting the base path the given dir must be
738# absolute to.
739# @param $dir The directory to be made absolute as a string. Note that the
740# dir may already be absolute, in which case it will remain
741# unchanged.
742# @return The now absolute form of the directory as a string.
743#
744# @author John Thompson, DL Consulting Ltd.
745# @copy 2006 DL Consulting Ltd.
746#
747#used in buildcol.pl, doesn't work for all cases --kjdon
748sub make_absolute {
749
750 my ($base_dir, $dir) = @_;
[18441]751### print STDERR "dir = $dir\n";
[17572]752 $dir =~ s/[\\\/]+/\//g;
753 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
754 $dir =~ s|^/tmp_mnt||;
755 1 while($dir =~ s|/[^/]*/\.\./|/|g);
756 $dir =~ s|/[.][.]?/|/|g;
757 $dir =~ tr|/|/|s;
[18441]758### print STDERR "dir = $dir\n";
[17572]759
760 return $dir;
761}
762## make_absolute() ##
[10281]763
[7929]764sub get_dirsep {
765
[28375]766 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[7929]767 return "\\";
768 } else {
769 return "\/";
770 }
771}
772
[619]773sub get_os_dirsep {
[4]774
[28375]775 if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
[619]776 return "\\\\";
777 } else {
778 return "\\\/";
779 }
780}
781
782sub get_re_dirsep {
783
784 return "\\\\|\\\/";
785}
786
787
[15003]788sub get_dirsep_tail {
789 my ($filename) = @_;
790
791 # returns last part of directory or filename
792 # On unix e.g. a/b.d => b.d
793 # a/b/c => c
794
[15088]795 my $dirsep = get_re_dirsep();
796 my @dirs = split (/$dirsep/, $filename);
797 my $tail = pop @dirs;
[15003]798
[15088]799 # - caused problems under windows
800 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
801
[15003]802 return $tail;
803}
804
805
[4]806# if this is running on windows we want binaries to end in
807# .exe, otherwise they don't have to end in any extension
808sub get_os_exe {
[28375]809 return ".exe" if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"));
[4]810 return "";
811}
812
813
[86]814# test to see whether this is a big or little endian machine
[15713]815sub is_little_endian
816{
817 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
818 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
819 # Otherwise, it's little endian
820
821 #return 0 if $^O =~ /^darwin$/i;
[17714]822 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
823
824 # Going back to stating exactly whether the machine is little endian
825 # or big endian, without any special case for Macs. Since for rata it comes
826 # back with little endian and for shuttle with bigendian.
[15713]827 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]828}
[4]829
[86]830
[135]831# will return the collection name if successful, "" otherwise
832sub use_collection {
[1454]833 my ($collection, $collectdir) = @_;
[135]834
[1454]835 if (!defined $collectdir || $collectdir eq "") {
[27303]836 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect");
[1454]837 }
838
[28211]839 if (!defined $ENV{'GREENSTONEHOME'}) { # for GS3, would have been defined in use_site_collection, to GSDL3HOME
840 $ENV{'GREENSTONEHOME'} = $ENV{'GSDLHOME'};
841 }
842
[135]843 # get and check the collection
844 if (!defined($collection) || $collection eq "") {
845 if (defined $ENV{'GSDLCOLLECTION'}) {
846 $collection = $ENV{'GSDLCOLLECTION'};
847 } else {
[2359]848 print STDOUT "No collection specified\n";
[135]849 return "";
850 }
851 }
852
853 if ($collection eq "modelcol") {
[2359]854 print STDOUT "You can't use modelcol.\n";
[135]855 return "";
856 }
857
858 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
859 # are defined
[17204]860 $ENV{'GSDLCOLLECTION'} = $collection;
[28211]861 $ENV{'GSDLCOLLECTHOME'} = $collectdir;
[27303]862 $ENV{'GSDLCOLLECTDIR'} = &FileUtils::filenameConcatenate($collectdir, $collection);
[135]863
864 # make sure this collection exists
865 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]866 print STDOUT "Invalid collection ($collection).\n";
[135]867 return "";
868 }
869
870 # everything is ready to go
871 return $collection;
872}
873
[21207]874sub get_current_collection_name {
875 return $ENV{'GSDLCOLLECTION'};
876}
[14926]877
878
879# will return the collection name if successful, "" otherwise.
880# Like use_collection (above) but for greenstone 3 (taking account of site level)
881
882sub use_site_collection {
883 my ($site, $collection, $collectdir) = @_;
884
885 if (!defined $collectdir || $collectdir eq "") {
886 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
[27303]887 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'}, "sites", $site, "collect");
[14926]888 }
889
[28211]890 if (defined $ENV{'GSDL3HOME'}) {
891 $ENV{'GREENSTONEHOME'} = $ENV{'GSDL3HOME'};
892 $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
893 } elsif (defined $ENV{'GSDL3SRCHOME'}) {
894 $ENV{'GREENSTONEHOME'} = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "web");
895 $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
896 } else {
897 print STDERR "*** util::use_site_collection(). Warning: Neither GSDL3HOME nor GSDL3SRCHOME set.\n";
898 }
899
[14926]900 # collectdir explicitly set by this point (using $site variable if required).
901 # Can call "old" gsdl2 use_collection now.
902
903 return use_collection($collection,$collectdir);
904}
905
906
907
[15018]908sub locate_config_file
909{
910 my ($file) = @_;
911
912 my $locations = locate_config_files($file);
913
914 return shift @$locations; # returns undef if 'locations' is empty
915}
916
917
918sub locate_config_files
919{
920 my ($file) = @_;
921
922 my @locations = ();
923
924 if (-e $file) {
925 # Clearly specified (most likely full filename)
926 # No need to hunt in 'etc' directories, return value unchanged
927 push(@locations,$file);
928 }
929 else {
930 # Check for collection specific one before looking in global GSDL 'etc'
[16969]931 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
932 my $test_collect_etc_filename
[27303]933 = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"etc", $file);
[16969]934
935 if (-e $test_collect_etc_filename) {
936 push(@locations,$test_collect_etc_filename);
937 }
[15018]938 }
939 my $test_main_etc_filename
[27303]940 = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"etc", $file);
[15018]941 if (-e $test_main_etc_filename) {
942 push(@locations,$test_main_etc_filename);
943 }
944 }
945
946 return \@locations;
947}
948
949
[9955]950sub hyperlink_text
951{
952 my ($text) = @_;
953
954 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
955 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
956
957 return $text;
958}
959
960
[16436]961# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
962# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
[27303]963sub is_dir_empty {
964 warnings::warnif("deprecated", "util::is_dir_empty() is deprecated, using FileUtils::isDirectoryEmpty() instead");
965 return &FileUtils::isDirectoryEmpty(@_);
[16436]966}
967
[18337]968# Returns the given filename converted using either URL encoding or base64
969# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]970# (if it is just the tailname), then $no_suffix should be some defined value.
971# rename_method can be url, none, base64
[18319]972sub rename_file {
[18337]973 my ($filename, $rename_method, $no_suffix) = @_;
[18329]974
[18337]975 if(!$filename) { # undefined or empty string
[18329]976 return $filename;
977 }
[18319]978
[20413]979 if (!$rename_method) {
980 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
981 # Debugging information
[22856]982 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
983 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]984 $rename_method = "url";
985 } elsif($rename_method eq "none") {
986 return $filename; # would have already been renamed
987 }
988
[19762]989 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
990 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
991 ###$filename =~ s/ /_/g;
[18337]992
993 my ($tailname,$dirname,$suffix);
994 if($no_suffix) { # given a tailname, no suffix
995 ($tailname,$dirname) = File::Basename::fileparse($filename);
996 }
997 else {
998 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
999 }
[23388]1000 if (!$suffix) {
1001 $suffix = "";
1002 }
[26973]1003 # This breaks GLI matching extracted metadata to files in Enrich panel, as
1004 # original is eg .JPG while gsdlsourcefilename ends up .jpg
1005 # Not sure why it was done in first place...
1006 #else {
1007 #$suffix = lc($suffix);
1008 #}
[18337]1009
[20413]1010 if ($rename_method eq "url") {
[18319]1011 $tailname = &unicode::url_encode($tailname);
1012 }
1013 elsif ($rename_method eq "base64") {
[18341]1014 $tailname = &unicode::base64_encode($tailname);
[18319]1015 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1016 }
[18326]1017
[18319]1018 $filename = "$tailname$suffix";
[18326]1019 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]1020
1021 return $filename;
1022}
1023
[21616]1024
1025# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]1026sub rename_ldb_or_bdb_file {
[18657]1027 my ($filename_no_ext) = @_;
1028
1029 my $new_filename = "$filename_no_ext.gdb";
[21615]1030 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]1031 # try ldb
1032 my $old_filename = "$filename_no_ext.ldb";
1033
1034 if (-f $old_filename) {
[19056]1035 print STDERR "Renaming $old_filename to $new_filename\n";
1036 rename ($old_filename, $new_filename)
1037 || print STDERR "Rename failed: $!\n";
[18657]1038 return;
1039 }
1040 # try bdb
1041 $old_filename = "$filename_no_ext.bdb";
1042 if (-f $old_filename) {
[19056]1043 print STDERR "Renaming $old_filename to $new_filename\n";
1044 rename ($old_filename, $new_filename)
1045 || print STDERR "Rename failed: $!\n";
[18657]1046 return;
1047 }
1048}
1049
[24874]1050sub os_dir() {
1051
1052 my $gsdlarch = "";
1053 if(defined $ENV{'GSDLARCH'}) {
1054 $gsdlarch = $ENV{'GSDLARCH'};
1055 }
1056 return $ENV{'GSDLOS'}.$gsdlarch;
1057}
[18657]1058
[21719]1059# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1060# By default, /greenstone3 for GS3 or /greenstone for GS2.
1061sub get_greenstone_url_prefix() {
1062 # if already set on a previous occasion, just return that
1063 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1064 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1065
[21719]1066 my ($configfile, $urlprefix, $defaultUrlprefix);
1067 my @propertynames = ();
1068
1069 if($ENV{'GSDL3SRCHOME'}) {
1070 $defaultUrlprefix = "/greenstone3";
[27303]1071 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
[21719]1072 push(@propertynames, qw/path\s*\=/);
1073 } else {
1074 $defaultUrlprefix = "/greenstone";
[27303]1075 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
[21719]1076 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1077 }
1078
1079 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1080
1081 if(!$urlprefix) { # no values found for URL prefix, use default values
1082 $urlprefix = $defaultUrlprefix;
1083 } else {
1084 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1085 $urlprefix =~ s/^\///; # remove the starting slash
1086 my @dirs = split(/(\\|\/)/, $urlprefix);
1087 $urlprefix = shift(@dirs);
1088
1089 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1090 $urlprefix = "/$urlprefix";
1091 }
1092 }
1093
1094 # set for the future
1095 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1096# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1097 return $urlprefix;
1098}
1099
1100
1101# Given a config file (xml or java properties file) and a list/array of regular expressions
1102# that represent property names to match on, this function will return the value for the 1st
1103# matching property name. If the return value is undefined, no matching property was found.
1104sub extract_propvalue_from_file() {
1105 my ($configfile, $propertynames) = @_;
1106
1107 my $value;
1108 unless(open(FIN, "<$configfile")) {
1109 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1110 return $value; # not initialised
1111 }
1112
1113 # Read the entire file at once, as one single line, then close it
1114 my $filecontents;
1115 {
1116 local $/ = undef;
1117 $filecontents = <FIN>;
1118 }
1119 close(FIN);
1120
1121 foreach my $regex (@$propertynames) {
1122 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1123 if($value) {
1124 $value =~ s/^\"//; # remove any startquotes
1125 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1126 last; # found value for a matching property, break from loop
1127 }
1128 }
1129
1130 return $value;
1131}
1132
[23306]1133# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1134# given that perllib is in @INC in order to invoke this subroutine.
1135# Call as follows -- after setting up INC to include perllib and
1136# after setting up GSDLHOME and GSDLOS:
1137#
1138# require util;
1139# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1140#
1141sub setup_greenstone_env() {
1142 my ($GSDLHOME, $GSDLOS) = @_;
1143
1144 #my %env_map = ();
1145 # Get the localised ENV settings of running a localised source setup.bash
[23314]1146 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1147 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1148 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[28375]1149 if (($GSDLOS =~ m/windows/i) && ($^O ne "cygwin")) {
[23314]1150 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1151 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1152 }
1153 if (!open(PIN, "$perl_command |")) {
1154 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1155 }
[23306]1156
1157 while (defined (my $perl_output_line = <PIN>)) {
1158 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1159 #$env_map{$key}=$value;
1160 $ENV{$key}=$value;
1161 }
[24563]1162 close (PIN);
1163
[23306]1164 # If any keys in $ENV don't occur in Greenstone's localised env
1165 # (stored in $env_map), delete those entries from $ENV
1166 #foreach $key (keys %ENV) {
1167 # if(!defined $env_map{$key}) {
1168 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1169 # delete $ENV{$key}; # del $ENV(key, value) pair
1170 # }
1171 #}
1172 #undef %env_map;
1173}
1174
[24362]1175sub get_perl_exec() {
1176 my $perl_exec = $^X; # may return just "perl"
1177
1178 if($ENV{'PERLPATH'}) {
[27303]1179 # OR: # $perl_exec = &FileUtils::filenameConcatenate($ENV{'PERLPATH'},"perl");
[28375]1180 if (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin")) {
[24362]1181 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1182 } else {
1183 $perl_exec = "$ENV{'PERLPATH'}/perl";
1184 }
1185 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1186 # containing the full path to the current perl executable we're using
1187 $perl_exec = $Config{perlpath}; # configured path for perl
1188 if (!-e $perl_exec) { # may not point to location on this machine
1189 $perl_exec = $^X; # may return just "perl"
1190 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1191 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1192 }
1193 }
1194 }
1195
1196 return $perl_exec;
1197}
1198
[25533]1199# returns the path to the java command in the JRE included with GS (if any),
1200# quoted to safeguard any spaces in this path, otherwise a simple java
1201# command is returned which assumes and will try for a system java.
[25512]1202sub get_java_command {
1203 my $java = "java";
1204 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1205 # after running setup.bat or from GLI which also runs setup.bat
[27303]1206 my $java_bin = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"packages","jre","bin");
[25512]1207 if(-d $java_bin) {
[27303]1208 $java = &FileUtils::filenameConcatenate($java_bin,"java");
[25533]1209 $java = "\"".$java."\""; # quoted to preserve spaces in path
[25512]1210 }
1211 }
1212 return $java;
1213}
[24362]1214
[25512]1215
[25577]1216# Given the qualified collection name (colgroup/collection),
1217# returns the collection and colgroup parts
1218sub get_collection_parts {
1219 # http://perldoc.perl.org/File/Basename.html
1220 # my($filename, $directories, $suffix) = fileparse($path);
1221 # "$directories contains everything up to and including the last directory separator in the $path
1222 # including the volume (if applicable). The remainder of the $path is the $filename."
1223 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1224
1225 my $qualified_collection = shift(@_);
1226
1227 # Since activate.pl can be launched from the command-line, including by a user,
1228 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1229 # Also allow for the accidental inclusion of multiple slashes
1230 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1231
1232 if(!defined $collection) {
1233 $collection = $colgroup;
1234 $colgroup = "";
1235 }
1236 return ($collection, $colgroup);
1237}
1238
1239# work out the "collectdir/collection" location
1240sub resolve_collection_dir {
1241 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1242
[28211]1243 if (defined $ENV{'GSDLCOLLECTDIR'}) { # a predefined collection dir exists
1244 return $ENV{'GSDLCOLLECTDIR'};
1245 }
1246
[25577]1247 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1248
[28175]1249 if (!defined $collect_dir || !$collect_dir) { # if undefined or empty string
1250 $collect_dir = &util::get_working_collect_dir($site);
[25577]1251 }
[28175]1252
1253 return &FileUtils::filenameConcatenate($collect_dir,$colgroup,$collection);
1254}
1255
1256# work out the full path to "collect" of this greenstone 2/3 installation
1257sub get_working_collect_dir {
[28211]1258 my ($site) = @_;
1259
1260 if (defined $ENV{'GSDLCOLLECTHOME'}) { # a predefined collect dir exists
1261 return $ENV{'GSDLCOLLECTHOME'};
[28175]1262 }
[28211]1263
1264 if (defined $site && $site) { # site non-empty, so get default collect dir for GS3
1265
1266 if (defined $ENV{'GSDL3HOME'}) {
1267 return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect"); # web folder
1268 }
1269 elsif (defined $ENV{'GSDL3SRCHOME'}) {
1270 return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site,"collect");
1271 }
1272 }
1273
[28213]1274 elsif (defined $ENV{'SITEHOME'}) {
[28211]1275 return &FileUtils::filenameConcatenate($ENV{'SITEHOME'},"collect");
[28177]1276 }
1277
1278 else { # get default collect dir for GS2
1279 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect");
1280 }
1281}
1282
[28211]1283sub is_abs_path_any_os {
[28177]1284 my ($path) = @_;
1285
[28211]1286 # We can have filenames in our DBs that were produced on other OS, so this method exists
1287 # to help identify absolute paths in such cases.
1288
[28177]1289 return 1 if($path =~ m@^/@); # full paths begin with forward slash on linux/mac
1290 return 1 if($path =~ m@^([a-zA-Z]\:|\\)@); # full paths begin with drive letter colon for Win or \ for volume, http://stackoverflow.com/questions/13011013/get-only-volume-name-from-filepath
1291
1292 return 0;
1293}
1294
1295
1296# This subroutine is for improving portability of Greenstone collections from one OS to another,
1297# to be used to convert absolute paths going into db files into paths with placeholders instead.
1298# This sub works with util::get_common_gs_paths and takes a path to a greenstone file and, if it's
1299# an absolute path, then it will replace the longest matching greenstone-path prefix of the given
1300# path with a placeholder to match.
1301# The Greenstone-path prefixes that can be matched are the following common Greenstone paths:
1302# the path to the current (specific) collection, the path to the general GS collect directory,
1303# the path to the site directory if GS3, else the path to the GSDLHOME/GSDL3HOME folder.
1304# The longest matching prefix will be replaced with the equivalent placeholder:
1305# @THISCOLLECTPATH@, else @COLLECTHOME@, else @SITEHOME@, else @GSDLHOME@.
1306sub abspath_to_placeholders {
1307 my $path = shift(@_); # path to convert from absolute to one with placeholders
[28236]1308 my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
[28177]1309
[28211]1310 return $path unless is_abs_path_any_os($path); # path is relative
[28236]1311
1312 if ($opt_long_or_short_winfilenames eq "long") {
1313 $path = &util::upgrade_if_dos_filename($path); # will only do something on windows
1314 }
[28225]1315
[28177]1316 # now we know we're dealing with absolute paths and have to replace gs prefixes with placeholders
[28213]1317 my @gs_paths = ($ENV{'GSDLCOLLECTDIR'}, $ENV{'GSDLCOLLECTHOME'}, $ENV{'SITEHOME'}, $ENV{'GREENSTONEHOME'}); # list in this order: from longest to shortest path
[28177]1318
[28211]1319 my %placeholder_map = ($ENV{'GREENSTONEHOME'} => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1320 $ENV{'GSDLCOLLECTHOME'} => '@COLLECTHOME@',
1321 $ENV{'GSDLCOLLECTDIR'} => '@THISCOLLECTPATH@'
[28177]1322 );
[28213]1323 $placeholder_map{$ENV{'SITEHOME'}} = '@SITEHOME@' if defined $ENV{'SITEHOME'};
[28177]1324
[28228]1325 $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
[28177]1326
[28236]1327 if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
[28228]1328 # for windows need to look for matches on short file names too
1329 # matched paths are again to be replaced with the usual placeholders
1330
1331 my $gsdlcollectdir = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'});
1332 my $gsdlcollecthome = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'});
1333 my $sitehome = (defined $ENV{'SITEHOME'}) ? &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) : undef;
1334 my $greenstonehome = &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'});
1335
1336 @gs_paths = ($gsdlcollectdir, $gsdlcollecthome, $sitehome, $greenstonehome); # order matters
1337
1338 %placeholder_map = ($greenstonehome => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1339 $gsdlcollecthome => '@COLLECTHOME@',
1340 $gsdlcollectdir => '@THISCOLLECTPATH@'
1341 );
1342 $placeholder_map{$sitehome} = '@SITEHOME@' if defined $sitehome;
1343
1344 $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
1345 }
1346
1347 return $path;
1348}
1349
1350sub _abspath_to_placeholders {
1351 my ($path, $gs_paths_ref, $placeholder_map_ref) = @_;
1352
[28177]1353 # The sequence of elements in @gs_paths matters
1354 # Need to loop starting from the *longest* matching path (the path to the specific collection)
1355 # to the shortest matching path (the path to gsdlhome/gsdl3home folder):
1356
[28228]1357 foreach my $gs_path (@$gs_paths_ref) {
[28213]1358 next if(!defined $gs_path); # site undefined for GS2
[28211]1359
[28177]1360 my $re_path = &util::filename_to_regex($gs_path); # escape for regex
1361
[28211]1362 if($path =~ m/^$re_path/i) { # case sensitive or not for OS?
[28177]1363
[28228]1364 my $placeholder = $placeholder_map_ref->{$gs_path}; # get the placeholder to replace the matched path with
[28177]1365
1366 $path =~ s/^$re_path/$placeholder/; #case sensitive or not?
1367 #$path =~ s/^[\\\/]//; # remove gs_path's trailing separator left behind at the start of the path
1368 last; # done
1369 }
1370 }
1371
1372 return $path;
1373}
1374
1375# Function that does the reverse of the util::abspath_to_placeholders subroutine
1376# Once again, call this with the values returned from util::get_common_gs_paths
1377sub placeholders_to_abspath {
1378 my $path = shift(@_); # path that can contain placeholders to convert to resolved absolute path
[28236]1379 my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
[28177]1380
1381 return $path if($path !~ m/@/); # path contains no placeholders
1382
[28213]1383 # replace placeholders with gs prefixes
[28177]1384 my @placeholders = ('@THISCOLLECTPATH@', '@COLLECTHOME@', '@SITEHOME@', '@GSDLHOME@'); # order of paths not crucial in this case,
1385 # but listed here from longest to shortest once placeholders are have been resolved
1386
[28213]1387 # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
[28228]1388 my %placeholder_to_gspath_map;
[28236]1389 if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
[28228]1390 # always replace placeholders with short file names of the absolute paths on windows?
1391 %placeholder_to_gspath_map = ('@GSDLHOME@' => &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'}),
1392 '@COLLECTHOME@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'}),
1393 '@THISCOLLECTPATH@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'})
1394 );
1395 $placeholder_to_gspath_map{'@SITEHOME@'} = &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) if defined $ENV{'SITEHOME'};
1396 } else {
1397 %placeholder_to_gspath_map = ('@GSDLHOME@' => $ENV{'GREENSTONEHOME'},
1398 '@SITEHOME@' => $ENV{'SITEHOME'}, # can be undef
1399 '@COLLECTHOME@' => $ENV{'GSDLCOLLECTHOME'},
1400 '@THISCOLLECTPATH@' => $ENV{'GSDLCOLLECTDIR'}
1401 ); # $placeholder_to_gspath_map{'@SITEHOME@'} = $ENV{'SITEHOME'} if defined $ENV{'SITEHOME'};
1402 }
[28177]1403
1404 foreach my $placeholder (@placeholders) {
1405 my $gs_path = $placeholder_to_gspath_map{$placeholder};
1406
[28213]1407 next if(!defined $gs_path); # sitehome for GS2 is undefined
[28177]1408
1409 if($path =~ m/^$placeholder/) {
1410 $path =~ s/^$placeholder/$gs_path/;
1411 last; # done
1412 }
1413 }
1414
1415 return $path;
1416}
1417
[25994]1418# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1419# a directory containing sequentially numbered images.
1420sub create_itemfile
1421{
1422 my ($output_dir, $convert_basename, $convert_to) = @_;
[27303]1423 my $page_num = "";
[25994]1424
[27303]1425 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
[25994]1426 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
[27303]1427 closedir DIR;
[25994]1428
[27303]1429 # Sort files in the directory by page_num
[25994]1430 sub page_number {
1431 my ($dir) = @_;
[27970]1432 my ($pagenum) =($dir =~ m/^.*?[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1433# my ($pagenum) =($dir =~ m/(\d+)(\.(jpg|gif|png))?$/i); # this works but is not as safe/strict about input filepatterns as the above
[25994]1434
1435 $pagenum = 1 unless defined $pagenum;
1436 return $pagenum;
1437 }
1438
[27303]1439 # sort the files in the directory in the order of page_num rather than lexically.
[25994]1440 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1441
1442 # work out if the numbering of the now sorted image files starts at 0 or not
1443 # by checking the number of the first _image_ file (skipping item files)
1444 my $starts_at_0 = 0;
1445 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1446 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1447 $starts_at_0 = 1;
1448 }
1449
[27303]1450 my $item_file = &FileUtils::filenameConcatenate($output_dir, $convert_basename.".item");
1451 my $item_fh;
1452 &FileUtils::openFileHandle($item_file, 'w', \$item_fh);
1453 print $item_fh "<PagedDocument>\n";
[25994]1454
1455 foreach my $file (@dir_files){
1456 if ($file !~ /\.item/i){
1457 $page_num = page_number($file);
1458 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
[27303]1459 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1460 }
[25994]1461 }
1462
[27303]1463 print $item_fh "</PagedDocument>\n";
1464 &FileUtils::closeFileHandle($item_file, \$item_fh);
[25994]1465 return $item_file;
1466}
1467
[27303]1468
[27374]1469## @function augmentINC()
1470#
1471# Prepend a path (if it exists) onto INC but only if it isn't already in INC
1472# @param $new_path The path to add
1473# @author jmt12
1474#
[27303]1475sub augmentINC
1476{
1477 my ($new_path) = @_;
1478 my $did_add_path = 0;
[27374]1479 # might need to be replaced with FileUtils::directoryExists() call eventually
[27303]1480 if (-d $new_path)
1481 {
1482 my $did_find_path = 0;
1483 foreach my $existing_path (@INC)
1484 {
1485 if ($existing_path eq $new_path)
1486 {
1487 $did_find_path = 1;
1488 last;
1489 }
1490 }
1491 if (!$did_find_path)
1492 {
1493 unshift(@INC, $new_path);
1494 $did_add_path = 1;
1495 }
1496 }
1497 return $did_add_path;
1498}
[27374]1499## augmentINC()
[27303]1500
[27374]1501
[4]15021;
Note: See TracBrowser for help on using the repository browser.