source: main/trunk/greenstone2/perllib/util.pm@ 27326

Last change on this file since 27326 was 27326, checked in by kjdon, 11 years ago

changed warning message to have correct function name

  • Property svn:keywords set to Author Date Id Revision
File size: 40.5 KB
RevLine 
[537]1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package util;
27
[23362]28use strict;
29
30use Encode;
[4]31use File::Copy;
[619]32use File::Basename;
[24362]33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
[27303]37use Config;
38# New module for file related utility functions - intended as a
39# placeholder for an extension that allows a variety of different
40# filesystems (FTP, HTTP, SAMBA, WEBDav, HDFS etc)
41use FileUtils;
[4]42
43# removes files (but not directories)
44sub rm {
[27303]45 warnings::warnif("deprecated", "util::rm() is deprecated, using FileUtils::removeFiles() instead");
46 return &FileUtils::removeFiles(@_);
[4]47}
48
49# recursive removal
[10211]50sub filtered_rm_r {
[27303]51 warnings::warnif("deprecated", "util::filtered_rm_r() is deprecated, using FileUtils::removeFilesFiltered() instead");
52 return &FileUtils::removeFilesFiltered(@_);
[4]53}
54
[10211]55# recursive removal
56sub rm_r {
[27326]57 warnings::warnif("deprecated", "util::rm_r() is deprecated, using FileUtils::removeFilesRecursive() instead");
[27303]58 return &FileUtils::removeFilesRecursive(@_);
[10211]59}
60
[721]61# moves a file or a group of files
62sub mv {
[27303]63 warnings::warnif("deprecated", "util::mv() is deprecated, using FileUtils::moveFiles() instead");
64 return &FileUtils::moveFiles(@_);
[721]65}
66
[25554]67# Move the contents of source directory into target directory
68# (as opposed to merely replacing target dir with the src dir)
69# This can overwrite any files with duplicate names in the target
70# but other files and folders in the target will continue to exist
71sub mv_dir_contents {
[27303]72 warnings::warnif("deprecated", "util::mv_dir_contents() is deprecated, using FileUtils::moveDirectoryContents() instead");
73 return &FileUtils::moveDirectoryContents(@_);
[25554]74}
[721]75
[4]76# copies a file or a group of files
77sub cp {
[27303]78 warnings::warnif("deprecated", "util::cp() is deprecated, using FileUtils::copyFiles() instead");
79 return &FileUtils::copyFiles(@_);
[4]80}
81
82# recursively copies a file or group of files
[1454]83# syntax: cp_r (sourcefiles, destination directory)
84# destination must be a directory - to copy one file to
85# another use cp instead
[4]86sub cp_r {
[27303]87 warnings::warnif("deprecated", "util::cp_r() is deprecated, using FileUtils::copyFilesrecursive() instead");
88 return &FileUtils::copyFilesRecursive(@_);
89}
[4]90
[21762]91# recursively copies a file or group of files
92# syntax: cp_r (sourcefiles, destination directory)
93# destination must be a directory - to copy one file to
94# another use cp instead
95sub cp_r_nosvn {
[27303]96 warnings::warnif("deprecated", "util::cp_r_nosvn() is deprecated, using FileUtils::copyFilesRecursiveNoSVN() instead");
97 return &FileUtils::copyFilesRecursiveNoSVN(@_);
[21762]98}
99
[11179]100# copies a directory and its contents, excluding subdirectories, into a new directory
101sub cp_r_toplevel {
[27303]102 warnings::warnif("deprecated", "util::cp_r_toplevel() is deprecated, using FileUtils::recursiveCopyTopLevel() instead");
103 return &FileUtils::recursiveCopyTopLevel(@_);
[11179]104}
105
[721]106sub mk_dir {
[27303]107 warnings::warnif("deprecated", "util::mk_dir() is deprecated, using FileUtils::makeDirectory() instead");
108 return &FileUtils::makeDirectory(@_);
[721]109}
110
[1046]111# in case anyone cares - I did some testing (using perls Benchmark module)
112# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
113# slightly faster (surprisingly) - Stefan.
[4]114sub mk_all_dir {
[27303]115 warnings::warnif("deprecated", "util::mk_all_dir() is deprecated, using FileUtils::makeAllDirectories() instead");
116 return &FileUtils::makeAllDirectories(@_);
[4]117}
118
[619]119# make hard link to file if supported by OS, otherwise copy the file
120sub hard_link {
[27303]121 warnings::warnif("deprecated", "util::hard_link() is deprecated, using FileUtils::hardLink() instead");
122 return &FileUtils::hardLink(@_);
[619]123}
124
[2193]125# make soft link to file if supported by OS, otherwise copy file
[721]126sub soft_link {
[27303]127 warnings::warnif("deprecated", "util::soft_link() is deprecated, using FileUtils::softLink() instead");
128 return &FileUtils::softLink(@_);
[721]129}
130
[23362]131# Primarily for filenames generated by processing
132# content of HTML files (which are mapped to UTF-8 internally)
133#
134# To turn this into an octet string that really exists on the file
135# system:
136# 1. don't need to do anything special for Unix-based systems
137# (as underlying file system is byte-code)
138# 2. need to map to short DOS filenames for Windows
[721]139
[23362]140sub utf8_to_real_filename
141{
142 my ($utf8_filename) = @_;
[721]143
[23362]144 my $real_filename;
[721]145
[23362]146 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
147 require Win32;
[23388]148
149 print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
150
[23362]151 my $unicode_filename = decode("utf8",$utf8_filename);
152 $real_filename = Win32::GetShortPathName($unicode_filename);
153 }
154 else {
155 $real_filename = $utf8_filename;
156 }
157
158 return $real_filename;
159}
160
[27303]161sub fd_exists {
162 warnings::warnif("deprecated", "util::fd_exists() is deprecated, using FileUtils::fileTest() instead");
163 return &FileUtils::fileTest(@_);
[23362]164}
165
[27303]166sub file_exists {
167 warnings::warnif("deprecated", "util::file_exists() is deprecated, using FileUtils::fileExists() instead");
168 return &FileUtils::fileExists(@_);
[23362]169}
170
[27303]171sub dir_exists {
172 warnings::warnif("deprecated", "util::dir_exists() is deprecated, using FileUtils::directoryExists() instead");
173 return &FileUtils::directoryExists(@_);
[23362]174}
175
[4]176# updates a copy of a directory in some other part of the filesystem
177# verbosity settings are: 0=low, 1=normal, 2=high
178# both $fromdir and $todir should be absolute paths
179sub cachedir {
[27303]180 warnings::warnif("deprecated", "util::cachedir() is deprecated, using FileUtils::synchronizeDirectories() instead");
181 return &FileUtils::synchronizeDirectories(@_);
[4]182}
183
184# this function returns -1 if either file is not found
185# assumes that $file1 and $file2 are absolute file names or
186# in the current directory
187# $file2 is allowed to be newer than $file1
188sub differentfiles {
[27303]189 warnings::warnif("deprecated", "util::differentfiles() is deprecated, using FileUtils::differentFiles() instead");
190 return &FileUtils::differentFiles(@_);
[4]191}
192
193
[16266]194sub get_tmp_filename
195{
196 my $file_ext = shift(@_) || undef;
197
[22438]198 my $opt_dot_file_ext = "";
199 if (defined $file_ext) {
200 if ($file_ext !~ m/\./) {
201 # no dot, so needs one added in at start
202 $opt_dot_file_ext = ".$file_ext"
203 }
204 else {
205 # allow for "extensions" such as _metadata.txt to be handled
206 # gracefully
207 $opt_dot_file_ext = $file_ext;
208 }
209 }
[16266]210
[27303]211 my $tmpdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
[4]212 &mk_all_dir ($tmpdir) unless -e $tmpdir;
213
214 my $count = 1000;
215 my $rand = int(rand $count);
[27303]216 my $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[16266]217
218 while (-e $full_tmp_filename) {
[4]219 $rand = int(rand $count);
[27303]220 $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
[4]221 $count++;
222 }
[16266]223
224 return $full_tmp_filename;
[4]225}
226
[22886]227sub get_timestamped_tmp_folder
[22873]228{
229
[22886]230 my $tmp_dirname;
[22873]231 if(defined $ENV{'GSDLCOLLECTDIR'}) {
232 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
233 } elsif(defined $ENV{'GSDLHOME'}) {
234 $tmp_dirname = $ENV{'GSDLHOME'};
[22886]235 } else {
236 return undef;
[22873]237 }
238
[27303]239 $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "tmp");
[22873]240 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
241
242 # add the timestamp into the path otherwise we can run into problems
243 # if documents have the same name
244 my $timestamp = time;
[27303]245 my $time_tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, $timestamp);
[22873]246 $tmp_dirname = $time_tmp_dirname;
247 my $i = 1;
248 while (-e $tmp_dirname) {
249 $tmp_dirname = "$time_tmp_dirname$i";
250 $i++;
251 }
252 &util::mk_dir($tmp_dirname);
253
[22886]254 return $tmp_dirname;
255}
[22873]256
[22886]257sub get_timestamped_tmp_filename_in_collection
258{
259
260 my ($input_filename, $output_ext) = @_;
261 # derive tmp filename from input filename
262 my ($tailname, $dirname, $suffix)
263 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
264
265 # softlink to collection tmp dir
266 my $tmp_dirname = &util::get_timestamped_tmp_folder();
267 $tmp_dirname = $dirname unless defined $tmp_dirname;
268
[22873]269 # following two steps copied from ConvertBinaryFile
[22886]270 # do we need them?? can't use them as is, as they use plugin methods.
271
[22873]272 #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
273
274 # URLEncode this since htmls with images where the html filename is utf8 don't seem
275 # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
276 # files on the filesystem.
277 #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
278 if (defined $output_ext) {
279 $output_ext = ".$output_ext"; # add the dot
280 } else {
281 $output_ext = $suffix;
282 }
283 $output_ext= lc($output_ext);
[27303]284 my $tmp_filename = &FileUtils::filenameConcatenate($tmp_dirname, "$tailname$output_ext");
[22873]285
286 return $tmp_filename;
287}
288
[21218]289sub get_toplevel_tmp_dir
290{
[27303]291 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
[21218]292}
293
294
[17512]295sub filename_to_regex {
296 my $filename = shift (@_);
[4]297
[24971]298 # need to make single backslashes double so that regex works
[24832]299 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);
[24829]300
[24832]301 # note that the first part of a substitution is a regex, so RE chars need to be escaped,
302 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
[24829]303 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
304 $filename =~ s@\(@\\(@g; # escape brackets
305 $filename =~ s@\)@\\)@g; # escape brackets
[24932]306 $filename =~ s@\[@\\[@g; # escape brackets
307 $filename =~ s@\]@\\]@g; # escape brackets
[24829]308
[17512]309 return $filename;
310}
311
[24829]312sub unregex_filename {
313 my $filename = shift (@_);
314
315 # need to put doubled backslashes for regex back to single
316 $filename =~ s/\\\./\./g; # remove RE syntax for .
317 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
318 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
[24932]319 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
320 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
[24940]321
322 # \\ goes to \
323 # This is the last step in reverse mirroring the order of steps in filename_to_regex()
324 $filename =~ s/\\\\/\\/g; # remove RE syntax for \
[24829]325 return $filename;
326}
327
[4]328sub filename_cat {
[27303]329 # I've disabled this warning for now, as every Greenstone perl
330 # script seems to make use of this function and so you drown in a
331 # sea of deprecated warnings [jmt12]
332# warnings::warnif("deprecated", "util::filename_cat() is deprecated, using FileUtils::filenameConcatenate() instead");
333 return &FileUtils::filenameConcatenate(@_);
[4]334}
335
[21413]336
337sub pathname_cat {
338 my $first_path = shift(@_);
339 my (@pathnames) = @_;
340
341 # If first_path is not null or empty, then add it back into the list
342 if (defined $first_path && $first_path =~ /\S/) {
343 unshift(@pathnames, $first_path);
344 }
345
[21425]346 my $join_char;
[21413]347 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
348 $join_char = ";";
349 } else {
350 $join_char = ":";
351 }
352
353 my $pathname = join($join_char, @pathnames);
354
355 # remove duplicate slashes
356 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
357 $pathname =~ s/[\\\/]+/\\/g;
358 } else {
359 $pathname =~ s/[\/]+/\//g;
360 # DB: want a pathname abc\de.html to remain like this
361 }
362
363 return $pathname;
364}
365
366
[19616]367sub tidy_up_oid {
368 my ($OID) = @_;
369 if ($OID =~ /\./) {
370 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
371 $OID =~ s/\.//g; #remove any periods
372 }
373 if ($OID =~ /^\s.*\s$/) {
374 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
375 # remove starting and trailing whitespace
376 $OID =~ s/^\s+//;
377 $OID =~ s/\s+$//;
378 }
379 if ($OID =~ /^[\d]*$/) {
380 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
381 $OID = "D" . $OID;
382 }
383
384 return $OID;
385}
[26206]386
[10212]387sub envvar_prepend {
388 my ($var,$val) = @_;
389
[26206]390 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
391 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
392
[16404]393 # do not prepend any value/path that's already in the environment variable
[24832]394
395 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
396 if (!defined($ENV{$var})) {
397 $ENV{$var} = "$val";
[16442]398 }
[24832]399 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]400 $ENV{$var} = "$val".$pathsep.$ENV{$var};
[10212]401 }
402}
403
404sub envvar_append {
405 my ($var,$val) = @_;
[26206]406
407 # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
408 my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
[24832]409
[16404]410 # do not append any value/path that's already in the environment variable
[26206]411
[24832]412 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
413 if (!defined($ENV{$var})) {
414 $ENV{$var} = "$val";
[16442]415 }
[24832]416 elsif($ENV{$var} !~ m/$escaped_val/) {
[26206]417 $ENV{$var} = $ENV{$var}.$pathsep."$val";
[24832]418 }
[10212]419}
420
[16442]421
[16380]422# splits a filename into a prefix and a tail extension using the tail_re, or
423# if that fails, splits on the file_extension . (dot)
424sub get_prefix_and_tail_by_regex {
[10212]425
[16380]426 my ($filename,$tail_re) = @_;
427
428 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
429 if ((!defined $file_prefix) || (!defined $file_ext)) {
430 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
431 }
432
433 return ($file_prefix,$file_ext);
434}
435
436# get full path and file only path from a base_dir (which may be empty) and
437# file (which may contain directories)
438sub get_full_filenames {
439 my ($base_dir, $file) = @_;
440
441 my $filename_full_path = $file;
442 # add on directory if present
[27303]443 $filename_full_path = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\S/;
[16380]444
445 my $filename_no_path = $file;
446
447 # remove directory if present
448 $filename_no_path =~ s/^.*[\/\\]//;
449 return ($filename_full_path, $filename_no_path);
450}
451
[8682]452# returns the path of a file without the filename -- ie. the directory the file is in
453sub filename_head {
454 my $filename = shift(@_);
455
456 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
457 $filename =~ s/[^\\\\]*$//;
458 }
459 else {
460 $filename =~ s/[^\\\/]*$//;
461 }
462
463 return $filename;
464}
465
466
[23362]467
[1454]468# returns 1 if filename1 and filename2 point to the same
469# file or directory
470sub filenames_equal {
471 my ($filename1, $filename2) = @_;
472
473 # use filename_cat to clean up trailing slashes and
474 # multiple slashes
[27303]475 $filename1 = &FileUtils::filenameConcatenate($filename1);
476 $filename2 = &FileUtils::filenameConcatenate($filename2);
[1454]477
478 # filenames not case sensitive on windows
479 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
480 $filename1 =~ tr/[A-Z]/[a-z]/;
481 $filename2 =~ tr/[A-Z]/[a-z]/;
482 }
483 return 1 if $filename1 eq $filename2;
484 return 0;
485}
486
[24932]487# If filename is relative to within_dir, returns the relative path of filename to that directory
488# with slashes in the filename returned as they were in the original (absolute) filename.
[23362]489sub filename_within_directory
490{
491 my ($filename,$within_dir) = @_;
492
[23371]493 if ($within_dir !~ m/[\/\\]$/) {
494 my $dirsep = &util::get_dirsep();
[23362]495 $within_dir .= $dirsep;
496 }
497
[24829]498 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets
[23362]499 if ($filename =~ m/^$within_dir(.*)$/) {
500 $filename = $1;
501 }
502
503 return $filename;
504}
505
[24932]506# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
507# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
508# The subpath returned will also be a URL type filename.
509sub filename_within_directory_url_format
510{
511 my ($filename,$within_dir) = @_;
512
513 # convert parameters only to / slashes if Windows
514
[24971]515 my $filename_urlformat = &filepath_to_url_format($filename);
516 my $within_dir_urlformat = &filepath_to_url_format($within_dir);
517
[24932]518 #if ($within_dir_urlformat !~ m/\/$/) {
519 # make sure directory ends with a slash
520 #$within_dir_urlformat .= "/";
521 #}
522
523 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
524
525 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
526
527 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
528 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
529 $filename_urlformat = $1;
530 }
531
532 return $filename_urlformat;
533}
534
[24971]535# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
536# since on Linux it doesn't represent a file separator but an escape char).
537sub filepath_to_url_format
538{
539 my ($filepath) = @_;
540 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
541 # Only need to worry about Windows, as Unix style directories already in url-format
542 # Convert Windows style \ => /
543 $filepath =~ s@\\@/@g;
544 }
545 return $filepath;
546}
[24932]547
[25093]548# regex filepaths on windows may include \\ as path separator. Convert \\ to /
549sub filepath_regex_to_url_format
550{
551 my ($filepath) = @_;
552 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
553 # Only need to worry about Windows, as Unix style directories already in url-format
554 # Convert Windows style \\ => /
555 $filepath =~ s@\\\\@/@g;
556 }
557 return $filepath;
558
559}
[24971]560
[25093]561# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
562# and ignores trailing /
563# returns (file, dirs) dirs will be empty if no subdirs
564sub url_fileparse
565{
566 my ($filepath) = @_;
567 # remove trailing /
568 $filepath =~ s@/$@@;
569 if ($filepath !~ m@/@) {
570 return ($filepath, "");
571 }
572 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
573 return ($file, $dirs);
574
575}
576
577
[10281]578sub filename_within_collection
579{
580 my ($filename) = @_;
581
582 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
583
584 if (defined $collect_dir) {
[23362]585
[15875]586 # if from within GSDLCOLLECTDIR, then remove directory prefix
587 # so source_filename is realative to it. This is done to aid
588 # portability, i.e. the collection can be moved to somewhere
589 # else on the file system and the archives directory will still
590 # work. This is needed, for example in the applet version of
591 # GLI where GSDLHOME/collect on the server will be different to
592 # the collect directory of the remove user. Of course,
593 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
594 # it back into a full pathname.
[23362]595
596 $filename = filename_within_directory($filename,$collect_dir);
[10281]597 }
598
599 return $filename;
600}
601
[23362]602sub prettyprint_file
603{
[23484]604 my ($base_dir,$file,$gli) = @_;
[23362]605
[27303]606 my $filename_full_path = &FileUtils::filenameConcatenate($base_dir,$file);
[23362]607
608 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
609 require Win32;
610
611 # For some reason base_dir in the form c:/a/b/c
612 # This leads to confusion later on, so turn it back into
613 # the more usual Windows form
614 $base_dir =~ s/\//\\/g;
615 my $long_base_dir = Win32::GetLongPathName($base_dir);
616 my $long_full_path = Win32::GetLongPathName($filename_full_path);
617
618 $file = filename_within_directory($long_full_path,$long_base_dir);
[23484]619 $file = encode("utf8",$file) if ($gli);
[23362]620 }
621
622 return $file;
623}
624
625
626sub upgrade_if_dos_filename
627{
[23371]628 my ($filename_full_path,$and_encode) = @_;
[23362]629
630 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
631 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
632 # to its long (Windows) version
[23416]633 my $long_filename = Win32::GetLongPathName($filename_full_path);
634 if (defined $long_filename) {
635 $filename_full_path = $long_filename;
636 }
[23362]637 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
[23483]638 $filename_full_path =~ s/^(.):/\u$1:/;
[23371]639 if ((defined $and_encode) && ($and_encode)) {
640 $filename_full_path = encode("utf8",$filename_full_path);
641 }
[23362]642 }
643
644 return $filename_full_path;
645}
646
647
[23388]648sub downgrade_if_dos_filename
649{
650 my ($filename_full_path) = @_;
651
652 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
653 require Win32;
654
655 # Ensure the given long Windows filename is in a form that can
656 # be opened by Perl => convert it to a short DOS-like filename
657
[23414]658 my $short_filename = Win32::GetShortPathName($filename_full_path);
659 if (defined $short_filename) {
660 $filename_full_path = $short_filename;
661 }
[23416]662 # Make sure initial drive letter is lower-case (to fit in
663 # with rest of Greenstone)
[23483]664 $filename_full_path =~ s/^(.):/\u$1:/;
[23388]665 }
666
667 return $filename_full_path;
668}
669
[23561]670sub block_filename
671{
672 my ($block_hash,$filename) = @_;
673
674 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
675
676 # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
677 my $lower_filename = lc($filename);
678 $block_hash->{'file_blocks'}->{$lower_filename} = 1;
679# my $lower_drive = $filename;
680# $lower_drive =~ s/^([A-Z]):/\l$1:/i;
681
682# my $upper_drive = $filename;
683# $upper_drive =~ s/^([A-Z]):/\u$1:/i;
684#
685# $block_hash->{'file_blocks'}->{$lower_drive} = 1;
686# $block_hash->{'file_blocks'}->{$upper_drive} = 1;
687 }
688 else {
689 $block_hash->{'file_blocks'}->{$filename} = 1;
690 }
691}
[23388]692
[23561]693
[18441]694sub filename_is_absolute
695{
[27303]696 warnings::warnif("deprecated", "util::filename_is_absolute() is deprecated, using FileUtils::isFilenameAbsolute() instead");
697 return &FileUtils::isFilenameAbsolute(@_);
[18441]698}
699
700
[17572]701## @method make_absolute()
702#
703# Ensure the given file path is absolute in respect to the given base path.
704#
705# @param $base_dir A string denoting the base path the given dir must be
706# absolute to.
707# @param $dir The directory to be made absolute as a string. Note that the
708# dir may already be absolute, in which case it will remain
709# unchanged.
710# @return The now absolute form of the directory as a string.
711#
712# @author John Thompson, DL Consulting Ltd.
713# @copy 2006 DL Consulting Ltd.
714#
715#used in buildcol.pl, doesn't work for all cases --kjdon
716sub make_absolute {
717
718 my ($base_dir, $dir) = @_;
[18441]719### print STDERR "dir = $dir\n";
[17572]720 $dir =~ s/[\\\/]+/\//g;
721 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
722 $dir =~ s|^/tmp_mnt||;
723 1 while($dir =~ s|/[^/]*/\.\./|/|g);
724 $dir =~ s|/[.][.]?/|/|g;
725 $dir =~ tr|/|/|s;
[18441]726### print STDERR "dir = $dir\n";
[17572]727
728 return $dir;
729}
730## make_absolute() ##
[10281]731
[7929]732sub get_dirsep {
733
734 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
735 return "\\";
736 } else {
737 return "\/";
738 }
739}
740
[619]741sub get_os_dirsep {
[4]742
[619]743 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
744 return "\\\\";
745 } else {
746 return "\\\/";
747 }
748}
749
750sub get_re_dirsep {
751
752 return "\\\\|\\\/";
753}
754
755
[15003]756sub get_dirsep_tail {
757 my ($filename) = @_;
758
759 # returns last part of directory or filename
760 # On unix e.g. a/b.d => b.d
761 # a/b/c => c
762
[15088]763 my $dirsep = get_re_dirsep();
764 my @dirs = split (/$dirsep/, $filename);
765 my $tail = pop @dirs;
[15003]766
[15088]767 # - caused problems under windows
768 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
769
[15003]770 return $tail;
771}
772
773
[4]774# if this is running on windows we want binaries to end in
775# .exe, otherwise they don't have to end in any extension
776sub get_os_exe {
777 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
778 return "";
779}
780
781
[86]782# test to see whether this is a big or little endian machine
[15713]783sub is_little_endian
784{
785 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
786 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
787 # Otherwise, it's little endian
788
789 #return 0 if $^O =~ /^darwin$/i;
[17714]790 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
791
792 # Going back to stating exactly whether the machine is little endian
793 # or big endian, without any special case for Macs. Since for rata it comes
794 # back with little endian and for shuttle with bigendian.
[15713]795 return (ord(substr(pack("s",1), 0, 1)) == 1);
[86]796}
[4]797
[86]798
[135]799# will return the collection name if successful, "" otherwise
800sub use_collection {
[1454]801 my ($collection, $collectdir) = @_;
[135]802
[1454]803 if (!defined $collectdir || $collectdir eq "") {
[27303]804 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect");
[1454]805 }
806
[135]807 # get and check the collection
808 if (!defined($collection) || $collection eq "") {
809 if (defined $ENV{'GSDLCOLLECTION'}) {
810 $collection = $ENV{'GSDLCOLLECTION'};
811 } else {
[2359]812 print STDOUT "No collection specified\n";
[135]813 return "";
814 }
815 }
816
817 if ($collection eq "modelcol") {
[2359]818 print STDOUT "You can't use modelcol.\n";
[135]819 return "";
820 }
821
822 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
823 # are defined
[17204]824 $ENV{'GSDLCOLLECTION'} = $collection;
[27303]825 $ENV{'GSDLCOLLECTDIR'} = &FileUtils::filenameConcatenate($collectdir, $collection);
[135]826
827 # make sure this collection exists
828 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
[2359]829 print STDOUT "Invalid collection ($collection).\n";
[135]830 return "";
831 }
832
833 # everything is ready to go
834 return $collection;
835}
836
[21207]837sub get_current_collection_name {
838 return $ENV{'GSDLCOLLECTION'};
839}
[14926]840
841
842# will return the collection name if successful, "" otherwise.
843# Like use_collection (above) but for greenstone 3 (taking account of site level)
844
845sub use_site_collection {
846 my ($site, $collection, $collectdir) = @_;
847
848 if (!defined $collectdir || $collectdir eq "") {
849 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
[27303]850 $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'}, "sites", $site, "collect");
[14926]851 }
852
853 # collectdir explicitly set by this point (using $site variable if required).
854 # Can call "old" gsdl2 use_collection now.
855
856 return use_collection($collection,$collectdir);
857}
858
859
860
[15018]861sub locate_config_file
862{
863 my ($file) = @_;
864
865 my $locations = locate_config_files($file);
866
867 return shift @$locations; # returns undef if 'locations' is empty
868}
869
870
871sub locate_config_files
872{
873 my ($file) = @_;
874
875 my @locations = ();
876
877 if (-e $file) {
878 # Clearly specified (most likely full filename)
879 # No need to hunt in 'etc' directories, return value unchanged
880 push(@locations,$file);
881 }
882 else {
883 # Check for collection specific one before looking in global GSDL 'etc'
[16969]884 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
885 my $test_collect_etc_filename
[27303]886 = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"etc", $file);
[16969]887
888 if (-e $test_collect_etc_filename) {
889 push(@locations,$test_collect_etc_filename);
890 }
[15018]891 }
892 my $test_main_etc_filename
[27303]893 = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"etc", $file);
[15018]894 if (-e $test_main_etc_filename) {
895 push(@locations,$test_main_etc_filename);
896 }
897 }
898
899 return \@locations;
900}
901
902
[9955]903sub hyperlink_text
904{
905 my ($text) = @_;
906
907 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
908 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
909
910 return $text;
911}
912
913
[16436]914# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
915# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
[27303]916sub is_dir_empty {
917 warnings::warnif("deprecated", "util::is_dir_empty() is deprecated, using FileUtils::isDirectoryEmpty() instead");
918 return &FileUtils::isDirectoryEmpty(@_);
[16436]919}
920
[18337]921# Returns the given filename converted using either URL encoding or base64
922# encoding, as specified by $rename_method. If the given filename has no suffix
[20413]923# (if it is just the tailname), then $no_suffix should be some defined value.
924# rename_method can be url, none, base64
[18319]925sub rename_file {
[18337]926 my ($filename, $rename_method, $no_suffix) = @_;
[18329]927
[18337]928 if(!$filename) { # undefined or empty string
[18329]929 return $filename;
930 }
[18319]931
[20413]932 if (!$rename_method) {
933 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
934 # Debugging information
[22856]935 # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
936 # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
[20413]937 $rename_method = "url";
938 } elsif($rename_method eq "none") {
939 return $filename; # would have already been renamed
940 }
941
[19762]942 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
943 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
944 ###$filename =~ s/ /_/g;
[18337]945
946 my ($tailname,$dirname,$suffix);
947 if($no_suffix) { # given a tailname, no suffix
948 ($tailname,$dirname) = File::Basename::fileparse($filename);
949 }
950 else {
951 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
952 }
[23388]953 if (!$suffix) {
954 $suffix = "";
955 }
[26973]956 # This breaks GLI matching extracted metadata to files in Enrich panel, as
957 # original is eg .JPG while gsdlsourcefilename ends up .jpg
958 # Not sure why it was done in first place...
959 #else {
960 #$suffix = lc($suffix);
961 #}
[18337]962
[20413]963 if ($rename_method eq "url") {
[18319]964 $tailname = &unicode::url_encode($tailname);
965 }
966 elsif ($rename_method eq "base64") {
[18341]967 $tailname = &unicode::base64_encode($tailname);
[18319]968 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
969 }
[18326]970
[18319]971 $filename = "$tailname$suffix";
[18326]972 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
[18319]973
974 return $filename;
975}
976
[21616]977
978# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
[21664]979sub rename_ldb_or_bdb_file {
[18657]980 my ($filename_no_ext) = @_;
981
982 my $new_filename = "$filename_no_ext.gdb";
[21615]983 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
[18657]984 # try ldb
985 my $old_filename = "$filename_no_ext.ldb";
986
987 if (-f $old_filename) {
[19056]988 print STDERR "Renaming $old_filename to $new_filename\n";
989 rename ($old_filename, $new_filename)
990 || print STDERR "Rename failed: $!\n";
[18657]991 return;
992 }
993 # try bdb
994 $old_filename = "$filename_no_ext.bdb";
995 if (-f $old_filename) {
[19056]996 print STDERR "Renaming $old_filename to $new_filename\n";
997 rename ($old_filename, $new_filename)
998 || print STDERR "Rename failed: $!\n";
[18657]999 return;
1000 }
1001}
1002
[24874]1003sub os_dir() {
1004
1005 my $gsdlarch = "";
1006 if(defined $ENV{'GSDLARCH'}) {
1007 $gsdlarch = $ENV{'GSDLARCH'};
1008 }
1009 return $ENV{'GSDLOS'}.$gsdlarch;
1010}
[18657]1011
[21719]1012# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1013# By default, /greenstone3 for GS3 or /greenstone for GS2.
1014sub get_greenstone_url_prefix() {
1015 # if already set on a previous occasion, just return that
1016 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1017 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
[18657]1018
[21719]1019 my ($configfile, $urlprefix, $defaultUrlprefix);
1020 my @propertynames = ();
1021
1022 if($ENV{'GSDL3SRCHOME'}) {
1023 $defaultUrlprefix = "/greenstone3";
[27303]1024 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
[21719]1025 push(@propertynames, qw/path\s*\=/);
1026 } else {
1027 $defaultUrlprefix = "/greenstone";
[27303]1028 $configfile = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
[21719]1029 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1030 }
1031
1032 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1033
1034 if(!$urlprefix) { # no values found for URL prefix, use default values
1035 $urlprefix = $defaultUrlprefix;
1036 } else {
1037 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1038 $urlprefix =~ s/^\///; # remove the starting slash
1039 my @dirs = split(/(\\|\/)/, $urlprefix);
1040 $urlprefix = shift(@dirs);
1041
1042 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1043 $urlprefix = "/$urlprefix";
1044 }
1045 }
1046
1047 # set for the future
1048 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1049# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1050 return $urlprefix;
1051}
1052
1053
1054# Given a config file (xml or java properties file) and a list/array of regular expressions
1055# that represent property names to match on, this function will return the value for the 1st
1056# matching property name. If the return value is undefined, no matching property was found.
1057sub extract_propvalue_from_file() {
1058 my ($configfile, $propertynames) = @_;
1059
1060 my $value;
1061 unless(open(FIN, "<$configfile")) {
1062 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1063 return $value; # not initialised
1064 }
1065
1066 # Read the entire file at once, as one single line, then close it
1067 my $filecontents;
1068 {
1069 local $/ = undef;
1070 $filecontents = <FIN>;
1071 }
1072 close(FIN);
1073
1074 foreach my $regex (@$propertynames) {
1075 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1076 if($value) {
1077 $value =~ s/^\"//; # remove any startquotes
1078 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1079 last; # found value for a matching property, break from loop
1080 }
1081 }
1082
1083 return $value;
1084}
1085
[23306]1086# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1087# given that perllib is in @INC in order to invoke this subroutine.
1088# Call as follows -- after setting up INC to include perllib and
1089# after setting up GSDLHOME and GSDLOS:
1090#
1091# require util;
1092# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1093#
1094sub setup_greenstone_env() {
1095 my ($GSDLHOME, $GSDLOS) = @_;
1096
1097 #my %env_map = ();
1098 # Get the localised ENV settings of running a localised source setup.bash
[23314]1099 # and put it into the ENV here. Need to clear GSDLHOME before running setup
1100 #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1101 my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";
[23306]1102 if($GSDLOS =~ m/windows/i) {
[23314]1103 #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1104 $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
[23306]1105 }
1106 if (!open(PIN, "$perl_command |")) {
1107 print STDERR ("Unable to execute command: $perl_command. $!\n");
[24563]1108 }
[23306]1109
1110 while (defined (my $perl_output_line = <PIN>)) {
1111 my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1112 #$env_map{$key}=$value;
1113 $ENV{$key}=$value;
1114 }
[24563]1115 close (PIN);
1116
[23306]1117 # If any keys in $ENV don't occur in Greenstone's localised env
1118 # (stored in $env_map), delete those entries from $ENV
1119 #foreach $key (keys %ENV) {
1120 # if(!defined $env_map{$key}) {
1121 # print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1122 # delete $ENV{$key}; # del $ENV(key, value) pair
1123 # }
1124 #}
1125 #undef %env_map;
1126}
1127
[24362]1128sub get_perl_exec() {
1129 my $perl_exec = $^X; # may return just "perl"
1130
1131 if($ENV{'PERLPATH'}) {
[27303]1132 # OR: # $perl_exec = &FileUtils::filenameConcatenate($ENV{'PERLPATH'},"perl");
[24362]1133 if($ENV{'GSDLOS'} =~ m/windows/) {
1134 $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1135 } else {
1136 $perl_exec = "$ENV{'PERLPATH'}/perl";
1137 }
1138 } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1139 # containing the full path to the current perl executable we're using
1140 $perl_exec = $Config{perlpath}; # configured path for perl
1141 if (!-e $perl_exec) { # may not point to location on this machine
1142 $perl_exec = $^X; # may return just "perl"
1143 if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1144 print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";
1145 }
1146 }
1147 }
1148
1149 return $perl_exec;
1150}
1151
[25533]1152# returns the path to the java command in the JRE included with GS (if any),
1153# quoted to safeguard any spaces in this path, otherwise a simple java
1154# command is returned which assumes and will try for a system java.
[25512]1155sub get_java_command {
1156 my $java = "java";
1157 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1158 # after running setup.bat or from GLI which also runs setup.bat
[27303]1159 my $java_bin = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"packages","jre","bin");
[25512]1160 if(-d $java_bin) {
[27303]1161 $java = &FileUtils::filenameConcatenate($java_bin,"java");
[25533]1162 $java = "\"".$java."\""; # quoted to preserve spaces in path
[25512]1163 }
1164 }
1165 return $java;
1166}
[24362]1167
[25512]1168
[25577]1169# Given the qualified collection name (colgroup/collection),
1170# returns the collection and colgroup parts
1171sub get_collection_parts {
1172 # http://perldoc.perl.org/File/Basename.html
1173 # my($filename, $directories, $suffix) = fileparse($path);
1174 # "$directories contains everything up to and including the last directory separator in the $path
1175 # including the volume (if applicable). The remainder of the $path is the $filename."
1176 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
1177
1178 my $qualified_collection = shift(@_);
1179
1180 # Since activate.pl can be launched from the command-line, including by a user,
1181 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1182 # Also allow for the accidental inclusion of multiple slashes
1183 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1184
1185 if(!defined $collection) {
1186 $collection = $colgroup;
1187 $colgroup = "";
1188 }
1189 return ($collection, $colgroup);
1190}
1191
1192# work out the "collectdir/collection" location
1193sub resolve_collection_dir {
1194 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1195
1196 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);
1197
[26976]1198 if (defined $collect_dir && $collect_dir) { # ensure not empty string either
[27303]1199 return &FileUtils::filenameConcatenate($collect_dir,$colgroup, $collection);
[25577]1200 }
[25796]1201 elsif (defined($ENV{'GSDLCOLLECTDIR'})) {
1202 return $ENV{'GSDLCOLLECTDIR'};
1203 }
[25577]1204 else {
1205 if (defined $site) {
[27303]1206 return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
[25577]1207 }
1208 else {
[27303]1209 return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
[25577]1210 }
1211 }
1212}
1213
[25994]1214# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1215# a directory containing sequentially numbered images.
1216sub create_itemfile
1217{
1218 my ($output_dir, $convert_basename, $convert_to) = @_;
[27303]1219 my $page_num = "";
[25994]1220
[27303]1221 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
[25994]1222 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
[27303]1223 closedir DIR;
[25994]1224
[27303]1225 # Sort files in the directory by page_num
[25994]1226 sub page_number {
1227 my ($dir) = @_;
1228 my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1229
1230 $pagenum = 1 unless defined $pagenum;
1231 return $pagenum;
1232 }
1233
[27303]1234 # sort the files in the directory in the order of page_num rather than lexically.
[25994]1235 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1236
1237 # work out if the numbering of the now sorted image files starts at 0 or not
1238 # by checking the number of the first _image_ file (skipping item files)
1239 my $starts_at_0 = 0;
1240 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1241 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1242 $starts_at_0 = 1;
1243 }
1244
[27303]1245 my $item_file = &FileUtils::filenameConcatenate($output_dir, $convert_basename.".item");
1246 my $item_fh;
1247 &FileUtils::openFileHandle($item_file, 'w', \$item_fh);
1248 print $item_fh "<PagedDocument>\n";
[25994]1249
1250 foreach my $file (@dir_files){
1251 if ($file !~ /\.item/i){
1252 $page_num = page_number($file);
1253 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
[27303]1254 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1255 }
[25994]1256 }
1257
[27303]1258 print $item_fh "</PagedDocument>\n";
1259 &FileUtils::closeFileHandle($item_file, \$item_fh);
[25994]1260 return $item_file;
1261}
1262
[27303]1263
1264# /** @function augmentINC()
1265# * Prepend a path (if it exists) onto INC but only if it isn't already in
1266# * INC
1267# * @param $new_path The path to add as necessary
1268# * @author jmt12
1269# */
1270sub augmentINC
1271{
1272 my ($new_path) = @_;
1273 my $did_add_path = 0;
1274 # will need to be replaced with FileUtils::directoryExists() call eventually
1275 if (-d $new_path)
1276 {
1277 my $did_find_path = 0;
1278 foreach my $existing_path (@INC)
1279 {
1280 if ($existing_path eq $new_path)
1281 {
1282 $did_find_path = 1;
1283 last;
1284 }
1285 }
1286 if (!$did_find_path)
1287 {
1288 unshift(@INC, $new_path);
1289 $did_add_path = 1;
1290 }
1291 }
1292 return $did_add_path;
1293}
1294# /** augmentINC($new_path) **/
1295
[4]12961;
Note: See TracBrowser for help on using the repository browser.