source: main/trunk/greenstone2/bin/script/build@ 24093

Last change on this file since 24093 was 24093, checked in by sjm84, 13 years ago

Fixing issues with perl finding the wrong perl by making sure it uses the one that is currently running

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 26.1 KB
RevLine 
[1778]1#!/usr/bin/perl -w
[1184]2
[1778]3###########################################################################
4#
5# build --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2000 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
[1198]28# This perl script may be called directly or by running build.bat on
29# windows (build.bat is in bin\windows)
[1184]30
[1762]31# Note that this script has grown over time and now has many options for
32# use when called from within the collector. If it appears to
33# over-complicate things a little, that's why.
34
[23311]35# I think that currently (Nov 2010) its only officially used from the collector
36# and depositor
37
[1454]38package build;
39
[17253]40use strict;
41no strict 'refs';
42
[1438]43use FileHandle;
[1454]44use File::Copy;
[1438]45
[1184]46BEGIN {
[1198]47
48 die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
49 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
50 die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
51 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
[1184]52 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
[1438]53
54 STDOUT->autoflush(1);
55 STDERR->autoflush(1);
[1184]56}
57
[1778]58use lib qq($ENV{'GSDLHOME'}/perllib/cpan);
59use Mail::Sendmail;
[1184]60use parsargv;
61use util;
[1454]62use cfgread;
[22589]63use colcfg;
64use dbutil;
[1184]65
[23313]66# set up greenstone environment
67&util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
68# is the following needed with the previous line??
[1678]69# set up path - this allows for paths not to be supplied to system calls
70# and overcomes problems when GSDLHOME contains spaces (double quoting
71# the call doesn't work on win2k and probably other variants of winnt)
72my $path_separator = ":";
73$path_separator = ";" if $ENV{'GSDLOS'} =~ /^windows$/;
74$ENV{'PATH'} = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}) .
75 $path_separator . &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script") .
76 $path_separator . $ENV{'PATH'};
77
[17253]78# all the input option variables
[22589]79my ($optionfile, $indextype, $append, $manifest, $remove_archives, $remove_import, $buildtype, $infodbtype, $maxdocs, @download, $collectdir, $site, $dontinstall, $save_archives, $out, $make_writable, $log_events, $event_log_file, $email_events, $mail_server, $statsfile, $event_header);
[17253]80
[1454]81&parse_args (\@ARGV);
[1198]82
83my ($collection) = @ARGV;
84
85if (!defined $collection || $collection !~ /\w/) {
86 print STDERR "You must specify a collection to build\n";
87 &print_usage();
88 die "\n";
89}
90
[1454]91if ($optionfile =~ /\w/) {
92 open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
93 my $line = [];
94 my $options = [];
95 while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
96 push (@$options, @$line);
97 }
98 close OPTIONS;
99 &parse_args ($options);
100}
101
[1198]102if ($maxdocs == -1) {
103 $maxdocs = "";
104} else {
105 $maxdocs = "-maxdocs $maxdocs";
106}
107
[1454]108my $cdir = $collectdir;
[22589]109my $gs_mode;
[17253]110if (defined $site && $site =~ /\w/)
[14925]111{
112 die "GSDL3HOME not set." unless $ENV{'GSDL3HOME'};
113 $cdir = &util::filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect") unless $collectdir =~ /\w/;
[22589]114 $gs_mode = "gs3";
[14925]115}
116else
117{
118 $cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
[22589]119 $gs_mode = "gs2";
[14925]120}
121
[1454]122my $importdir = &util::filename_cat ($cdir, $collection, "import");
123my $archivedir = &util::filename_cat ($cdir, $collection, "archives");
124my $buildingdir = &util::filename_cat ($cdir, $collection, "building");
125my $indexdir = &util::filename_cat ($cdir, $collection, "index");
[22589]126my $etcdir = &util::filename_cat ($cdir, $collection, "etc");
[1198]127my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
128
[1431]129my $use_out = 0;
[1424]130my $outfile = $out;
131if ($out !~ /^(STDERR|STDOUT)$/i) {
132 open (OUT, ">$out") || die "Couldn't open output file $out\n";
133 $out = "OUT";
[1452]134
135 # delete any existing .final file
136 &util::rm ("$outfile.final") if -e "$outfile.final";
137
[1431]138 $use_out = 1;
[1424]139}
140$out->autoflush(1);
141
[1454]142# delete any .kill file left laying around from a previously aborted build
143if (-e &util::filename_cat ($cdir, $collection, ".kill")) {
144 &util::rm (&util::filename_cat ($cdir, $collection, ".kill"));
145}
146
[1762]147# get maintainer email address from main.cfg
148my $maintainer = "NULL";
149my $main_cfg = &util::filename_cat ($ENV{'GSDLHOME'}, "etc", "main.cfg");
150my $cfgdata = &cfgread::read_cfg_file ($main_cfg, "maintainer");
151if (defined $cfgdata->{'maintainer'} && $cfgdata->{'maintainer'} =~ /\w/) {
152 $maintainer = $cfgdata->{'maintainer'};
153}
154# if maintainer is "NULL" email_events should be disabled
155if ($maintainer =~ /^NULL$/i) {
156 $email_events = "";
157}
158
[1184]159&main();
160
[2785]161if ($use_out) {
162 close OUT;
[1424]163
[2785]164 # if we've created a build log we'll copy it to the collection's etc
165 # directory
[22589]166 my ($final_etcdir);
[2785]167 if ($dontinstall) {
[22589]168 $final_etcdir = &util::filename_cat($collectdir, "etc", "build.log");
[2785]169 } else {
[22589]170 $final_etcdir = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "etc", "build.log");
[2785]171 }
172
[22589]173 &util::cp($outfile, $final_etcdir);
[2785]174}
175
[1184]176sub print_usage {
[2359]177 print STDOUT "\n";
178 print STDOUT "build: Builds a Greenstone collection (i.e. runs import.pl and buildcol.pl\n";
179 print STDOUT " then copies the resulting indexes to the correct place).\n\n";
180 print STDOUT " usage: $0 [options] collection-name\n\n";
181 print STDOUT " options:\n";
182 print STDOUT " -optionfile file Get options from file, useful on systems where\n";
183 print STDOUT " long command lines may cause problems\n";
[12003]184 print STDOUT " -indextype mg|mgpp|lucene \n";
185 print STDERR " Specify the type of indexer used in this collection\n";
186 print STDERR " If -append is used then -indextype is needed to \n";
187 print STDERR " determine how to run buildcol.pl as well as update\n";
188 print STDERR " 'building' and 'index' according.\n";
[2359]189 print STDOUT " -append Add new files to existing collection\n";
[12003]190 print STDOUT " -manifest Use manifest.xml file to determine which files to process.\n";
[2359]191 print STDOUT " -remove_archives Remove archives directory after successfully\n";
192 print STDOUT " building the collection.\n";
193 print STDOUT " -remove_import Remove import directory after successfully\n";
194 print STDOUT " importing the collection.\n";
195 print STDOUT " -buildtype build|import If 'build' attempt to build directly\n";
196 print STDOUT " from archives directory (bypassing import\n";
197 print STDOUT " stage). Defaults to 'import'\n";
198 print STDOUT " -maxdocs number Maximum number of documents to build\n";
199 print STDOUT " -download directory Directory (or file) to get import documents from.\n";
200 print STDOUT " There may be multiple download directories and they\n";
201 print STDOUT " may be of type http://, ftp://, or file://\n";
202 print STDOUT " Note that any existing import directory will be\n";
203 print STDOUT " deleted to make way for the downloaded data if\n";
204 print STDOUT " a -download option is supplied\n";
205 print STDOUT " -collectdir directory Collection directory (defaults to " .
[14925]206 &util::filename_cat($ENV{'GSDLHOME'}). "collect for Greenstone2;\n";
207 print STDOUT" for Greenstone3 use -site option and then collectdir default will be\n";
208 print STDOUT " set to the collect folder within that site.)\n";
209 print STDOUT " -site Specify the site within a Greenstone3 installation to use.\n";
[2359]210 print STDOUT " -dontinstall Only applicable if -collectdir is set to something\n";
211 print STDOUT " other than the default. -dontinstall will suppress\n";
212 print STDOUT " the default behaviour which is to install the\n";
213 print STDOUT " collection to the gsdl/collect directory once it has\n";
214 print STDOUT " been built.\n";
215 print STDOUT " -save_archives Create a copy of the existing archives directory\n";
216 print STDOUT " called archives.org\n";
217 print STDOUT " -out Filename or handle to print output status to.\n";
218 print STDOUT " The default is STDERR\n";
[2785]219 print STDOUT " -statsfile name Filename or handle to print import statistics to.\n";
220 print STDOUT " The default is STDERR\n";
[2566]221 print STDOUT " -make_writable If set build will make the collection and any\n";
222 print STDOUT " temporary files it created globally writable after\n";
223 print STDOUT " it finishes\n";
[2359]224 print STDOUT " -log_events Log important events (collection built successfully\n";
225 print STDOUT " etc.) to event_log_file\n";
226 print STDOUT " -event_log_file file File to append important events to (defaults to\n";
227 print STDOUT " " . &util::filename_cat ($ENV{'GSDLHOME'}, "etc", "events.txt") . "\n";
228 print STDOUT " -email_events addr Comma separated list of email addresses to mail\n";
229 print STDOUT " details of important collection building events\n";
230 print STDOUT " -mail_server server The outgoing (SMTP) mail server to be used by\n";
231 print STDOUT " email_events. email_events will be disabled if\n";
232 print STDOUT " mail_server isn't set\n";
233 print STDOUT " -event_header file File containing a header to go on any event\n";
234 print STDOUT " messages. If not specified build will create a\n";
235 print STDOUT " generic header\n\n";
[2469]236 print STDOUT " [Type \"build | more\" if this help text scrolled off your screen]";
[2359]237 print STDOUT "\n" unless $ENV{'GSDLOS'} =~ /^windows$/i;
[1184]238}
239
240sub main {
[1454]241
242 if ($save_archives && -d $archivedir) {
243 print $out "caching original archives to ${archivedir}.org\n";
244 &util::cp_r ($archivedir, "${archivedir}.org");
245 }
246
[1424]247 # do the download thing if we have any -download options
248 if (scalar (@download)) {
249 # remove any existing import data
[1431]250 if (&has_content ($importdir)) {
[23311]251 #print $out "build: WARNING: removing contents of $importdir\n";
252 #&util::rm_r ($importdir);
[1431]253 }
[1454]254
[17253]255 foreach my $download_dir (@download) {
[1507]256
257 # remove any leading or trailing whitespace from filenames (just in case)
258 $download_dir =~ s/^\s+//;
259 $download_dir =~ s/\s+$//;
[1424]260
[1709]261 if ($download_dir =~ /^(http|ftp):\/\//) {
262 # use wget to mirror http or ftp urls
263 # options used are:
264 # -P = the directory to download documents to
265 # -np = don't ascend to parent directories. this means that only documents
266 # that live in the same directory or below on the same server as
267 # the given url will be downloaded
268 # -nv = not too verbose
269 # -r = recursively mirror
270 # -N = use time-stamping to see if an up-to-date local copy of each
271 # file already exists. this may be useful if wget fails and
272 # is restarted
273 # -l inf = infinite recursion depth
274 # -R "*\?*" = don't download cgi based urls
275 # -o = the output file to write download status to (only used if the -out
276 # option was given to build)
[1743]277
[24093]278 #$^X is a special variable containing the full path to the current perl executable we are in
279 my $download_cmd = "$^X -S gsWget.pl -P \"$importdir\" -np -nv";
[1753]280 $download_cmd .= " -r -N -l inf -R \"*\\?*\"";
[1709]281 $download_cmd .= " -o \"$outfile.download\"" if $use_out;
282 $download_cmd .= " \"$download_dir\"";
283 system ($download_cmd);
[1424]284
[1709]285 # note that wget obeys the robot rules. this means that it will have
286 # downloaded a robots.txt file if one was present. since it's unlikely
287 # anyone really wants to include it in a collection we'll delete it.
288 # robots.txt shouldn't be more than two directories deep (I think it will
289 # always be exactly two deep but will look for it in the top directory too)
290 # so that's as far as we'll go looking for it.
291 if (opendir (DIR, $importdir)) {
292 my @files = readdir DIR;
293 closedir DIR;
294 foreach my $file (@files) {
295 next if $file =~ /^\.\.?$/;
296 if ($file =~ /^robots.txt$/i) {
297 &util::rm (&util::filename_cat ($importdir, $file));
298 last;
299 } else {
[1743]300 $file = &util::filename_cat ($importdir, $file);
[1709]301 if (-d $file) {
302 if (opendir (DIR, $file)) {
[1743]303 my @files2 = readdir DIR;
[1709]304 closedir DIR;
[1743]305 foreach my $file2 (@files2) {
306 if ($file2 =~ /^robots.txt$/i) {
307 &util::rm (&util::filename_cat ($file, $file2));
[1709]308 last;
309 }
310 }
311 }
312 }
313 }
314 }
315 }
[1424]316
[1709]317 # if using output directory append the file download output to it
318 &append_file ($out, "$outfile.download");
319
[1424]320 } else {
321 # we assume anything not beginning with http:// or ftp://
322 # is a file or directory on the local file system.
[1485]323 $download_dir =~ s/^file:(\/\/)?//;
[1507]324 $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest
[1424]325
326 if (-e $download_dir) {
327 # copy download_dir and all it contains to the import directory
[24093]328 #$^X is a special variable containing the full path to the current perl executable we are in
329 my $download_cmd = "$^X -S filecopy.pl";
[1454]330 $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[14925]331 $download_cmd .= " -site \"$site\"" if $site =~ /\w/;
[1431]332 $download_cmd .= " -out \"$outfile.download\"" if $use_out;
333 $download_cmd .= " \"" . $download_dir . "\" " . $collection;
[14925]334
335 my $download_status = system ($download_cmd);
336 if ($download_status > 0)
337 {
338 die "Failed to execute: $download_cmd\n";
339 }
340
341
[1431]342 # if using output directory append the file download output to it
343 &append_file ($out, "$outfile.download");
[1424]344 } else {
[1507]345 print $out "WARNING: '$download_dir' does not exist\n";
[1424]346 }
347 }
348 }
349 }
350
[14925]351 `echo $importdir ; ls $importdir `;
352
[22589]353 my $col_cfg_file;
354 if ($gs_mode eq "gs3") {
355 $col_cfg_file = &util::filename_cat($etcdir, "collectionConfig.xml");
356 } else {
357 $col_cfg_file = &util::filename_cat($etcdir, "collect.cfg");
358 }
359
360 my $collect_cfg = &colcfg::read_collection_cfg ($col_cfg_file, $gs_mode);
361 # get the database type for this collection from its configuration file (may be undefined)
362 $infodbtype = $collect_cfg->{'infodbtype'} || &dbutil::get_default_infodb_type();
363
364 my $archiveinf_doc_file_path = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
[21567]365 if (-e $archiveinf_doc_file_path) {
[1198]366 if (&has_content ($importdir)) {
[1184]367 if ($buildtype eq "build") {
368 &gsdl_build();
369 } else {
370 &gsdl_import();
371 &gsdl_build();
372 }
373 } else {
374 # there are archives but no import, build directly from archives
[1424]375 print $out "build: no import material was found, building directly\n";
376 print $out " from archives\n";
[1184]377 &gsdl_build();
378 }
379 } else {
[1198]380 if (&has_content ($importdir)) {
[1184]381 if ($buildtype eq "build") {
[1424]382 print $out "build: can't build directly from archives as no\n";
383 print $out " imported archives exist (did you forget to\n";
384 print $out " move the contents of $collection/import to\n";
385 print $out " collection/archives?)\n";
[1184]386 }
387 &gsdl_import();
[21567]388 if (&has_content ($archivedir, "^archiveinf-doc\..*\$")) {
[2381]389 &gsdl_build();
390 } else {
391 my $msg = "build: ERROR: The collection could not be built as no\n";
392 $msg .= " valid data was imported. Are at least some of\n";
393 $msg .= " the files you imported in a format that can be\n";
394 $msg .= " processed by the specified Greenstone plugins?\n";
395 print $out $msg;
396 &log_event ($msg);
397 &final_out (6) if $use_out;
398 die "\n";
399 }
[1184]400 } else {
401 # no import or archives
[2381]402 my $msg = "build: ERROR: The collection could not be built as it contains no data.\n";
[1762]403 print $out $msg;
404 &log_event ($msg);
[1438]405 &final_out (1) if $use_out;
[1184]406 die "\n";
407 }
408 }
[1454]409
410 if ($collectdir ne "" && !$dontinstall) {
[14925]411
412 my $install_collectdir;
413 if (defined $ENV{'GSDL3HOME'})
414 {
415
416 if ((defined $site) && ($site ne ""))
417 {
418 $install_collectdir = &util::filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
419 }
420 else
421 {
422 my $msg = "build: ERROR: Need to specify the site within the Greenstone3 installation.";
423 print $out $msg;
424 &log_event ($msg);
425 &final_out (6) if $use_out;
426 die "\n";
427 }
428 }
429 else
430 {
431 $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
432 }
433
[1454]434 if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
435
436 # install collection to gsdl/collect
437 print $out "installing the $collection collection\n";
438 my $newdir = &util::filename_cat ($install_collectdir, $collection);
439 my $olddir = &util::filename_cat ($collectdir, $collection);
440 if (-d $newdir) {
[1762]441 my $msg = "build: Could not install collection as $newdir\n" .
442 " already exists. Collection will remain at\n$olddir\n";
443
444 print $out $msg;
445 &log_event ($msg);
[1454]446 &final_out (4) if $use_out;
447 die "\n";
448 }
449 if (!&File::Copy::move ($olddir, $newdir)) {
[1762]450 my $msg = "build: Failed to install collection to $newdir\n" .
451 " Collection will remain at $olddir\n";
452 print $out $msg;
453 &log_event ($msg);
[1454]454 &final_out (5) if $use_out;
455 die "\n";
456 }
457 }
458 }
459
[1762]460 &log_event ("The $collection collection was built successfully\n");
[1438]461 &final_out (0) if $use_out;
[1184]462}
463
464sub gsdl_import {
465
[1424]466 print $out "importing the $collection collection\n\n";
[1198]467
[24093]468 #$^X is a special variable containing the full path to the current perl executable we are in
469 my $import_cmd = "$^X -S import.pl";
[1431]470 $import_cmd .= " -out \"$outfile.import\"" if $use_out;
[12003]471 if ($append) {
472 $import_cmd .= " -keepold";
[13067]473 if (not $manifest) {
474 # if we are appending, with no manifest, assume incremental
475 $import_cmd .= " -incremental";
476 }
[12003]477 } else {
478 $import_cmd .= " -removeold";
479 }
480
481 $import_cmd .= " -manifest manifest.xml" if ($manifest);
[14925]482 $import_cmd .= " -site \"$site\"" if $site =~ /\w/;
[1454]483 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[2785]484 $import_cmd .= " -statsfile \"$statsfile\"" if $statsfile =~ /\w/;
[1431]485 $import_cmd .= " $maxdocs $collection";
[2785]486
[1431]487 system ($import_cmd);
488 # if using output directory append the import output to it
489 &append_file ($out, "$outfile.import");
490
[22589]491 my $archiveinf_doc_file_path = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
492
[21567]493 if (-e $archiveinf_doc_file_path) {
[1424]494 print $out "$collection collection imported successfully\n\n";
495 if ($remove_import) {
496 print $out "removing import directory ($importdir)\n";
497 &util::rm_r ($importdir);
498 }
[1184]499 } else {
[22589]500 print $out "$archiveinf_doc_file_path not found. archives contents:\n";
[19485]501 print $out `ls $archivedir`;
502
[1762]503 my $msg = "build: ERROR: import.pl failed\n";
504 print $out "\n$msg";
505 &log_event ($msg);
[1438]506 &final_out (2) if $use_out;
[1454]507 die "\n";
[1184]508 }
509}
510
511sub gsdl_build {
512
[1424]513 print $out "building the $collection collection\n\n";
[1184]514
[24093]515 #$^X is a special variable containing the full path to the current perl executable we are in
516 my $build_cmd = "$^X -S buildcol.pl";
[12003]517
518 my $removeold = 1;
519 if ($append) {
520 if ($indextype eq "lucene") {
[13067]521 $build_cmd .= " -keepold -incremental -builddir $indexdir";
[12003]522 $removeold = 0;
523 }
524 else {
525 $build_cmd .= " -removeold";
526 }
527 }
528 else {
529 $build_cmd .= " -removeold";
530 }
531
[1431]532 $build_cmd .= " -out \"$outfile.build\"" if $use_out;
[14925]533 $build_cmd .= " -site \"$site\"" if $site =~ /\w/;
[1454]534 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[1431]535 $build_cmd .= " $maxdocs $collection";
536 system ($build_cmd);
537 # if using output directory append the buildcol output to it
538 &append_file ($out, "$outfile.build");
539
[19485]540 my @db_exts = ( ".ldb", ".bdb", ".gdb", ".db" );
541 my $build_ok = 0;
542 foreach my $db_ext (@db_exts) {
543 if ($removeold && (-e &util::filename_cat ($buildingdir, "text", "$collection$db_ext"))) {
544 $build_ok = 1;
545 last;
546 }
547 if (($removeold==0) && (-e &util::filename_cat ($indexdir, "text", "$collection$db_ext"))) {
548 $build_ok = 1;
549 last;
550 }
551 }
552
553 if ($build_ok) {
[1424]554 print $out "$collection collection built successfully\n\n";
555 if ($remove_archives) {
556 print $out "removing archives directory ($archivedir)\n";
557 &util::rm_r ($archivedir);
558 }
[1184]559 } else {
[1762]560 my $msg = "build: ERROR: buildcol.pl failed\n";
561 print $out "\n$msg";
562 &log_event ($msg);
[1454]563 &final_out (3) if $use_out;
564 die "\n";
[1184]565 }
566
[12003]567 if ($removeold) {
568 # replace old indexes with new ones
569 if (&has_content ($indexdir)) {
570 print $out "removing old indexes\n";
571 &util::rm_r ($indexdir);
572 }
573 rmdir ($indexdir) if -d $indexdir;
574 &File::Copy::move ($buildingdir, $indexdir);
[1184]575 }
[12003]576 else {
[13067]577 # Do nothing. We have built into index dir rather than building dir
[12003]578 }
[1454]579
[13067]580 # remove the cached archives
[1461]581 if ($save_archives && -d "${archivedir}.org") {
[1454]582 &util::rm_r ("${archivedir}.org");
583 }
[1184]584}
585
[2381]586# return 1 if $dir directory contains any files or sub-directories (other
587# than those specified in the $ignore regular expression)
[1184]588sub has_content {
[2381]589 my ($dir, $ignore) = @_;
[1184]590
591 if (!-d $dir) {return 0;}
[2381]592
[1184]593 opendir (DIR, $dir) || return 0;
594 my @files = readdir DIR;
[2488]595 closedir DIR;
[1184]596
597 foreach my $file (@files) {
598 if ($file !~ /^\.{1,2}$/) {
[2381]599 return 1 unless (defined $ignore && $file =~ /$ignore/);
[1184]600 }
601 }
[14925]602
[1184]603 return 0;
604}
[1431]605
606sub append_file {
607 my ($handle, $file) = @_;
608
609 open (FILE, $file) || return;
610 undef $/;
611 print $handle <FILE>;
612 $/ = "\n";
613 close FILE;
614 &util::rm ($file);
615}
[1438]616
[2774]617# creates a file called $outfile.final and writes an output code to it.
[1438]618# An output code of 0 specifies that there was no error
619sub final_out {
620 my ($exit_code) = @_;
621
[2774]622 if ($use_out && (!-e "$outfile.final")) {
623
624 if (open (FINAL, ">$outfile.final")) {
625 print FINAL $exit_code;
626 close FINAL;
627 }
[1438]628 }
629}
[1454]630
[1762]631sub log_event {
632 my ($msg) = @_;
633
634 return unless ($log_events || $email_events);
635
636 # get the event header
[1778]637 my $eheader = "[Build Event]\n";
638 $eheader .= "Date: " . scalar localtime() . "\n";
[1762]639 if ($event_header ne "" && open (HEADER, $event_header)) {
640 undef $/;
[1778]641 $eheader .= <HEADER>;
[1762]642 $/ = "\n";
643 close HEADER;
644 } else {
645 $eheader .= "Collection: $collection\n";
[1778]646 $eheader .= "GSDLHOME: $ENV{'GSDLHOME'}\n";
647 $eheader .= "Build Location: $collectdir\n";
[1762]648 }
649
650 if ($log_events) {
651 my $fail = 0;
652 # append the event to the event log file
653 if ($event_log_file eq "" || !open (LOG, ">>$event_log_file")) {
654 # log file defaults to $GSDLHOME/etc/events.txt
655 $event_log_file = &util::filename_cat ($ENV{'GSDLHOME'}, "etc", "events.txt");
656 if (!open (LOG, ">>$event_log_file")) {
657 print $out "build: ERROR: Couldn't open event log file $event_log_file\n";
658 $fail = 1;
659 }
660 }
661 if (!$fail) {
662 print LOG $eheader;
663 print LOG $msg;
[1778]664 print LOG "\n";
[1762]665 close LOG;
666 }
667 }
668
669 if ($email_events) {
670 # if mail_server isn't set email_events does nothing
671 if ($mail_server eq "") {
672 print $out "build: WARNING: mail_server was not set - email_events option was ignored\n";
673 return;
674 }
675
676 my %mail = ('SMTP' => $mail_server,
677 'To' => $email_events,
678 'From' => $maintainer,
679 'Subject' => 'Greenstone Build Event'
680 );
681 $mail{'Message'} = $eheader . $msg;
682
683 if (!sendmail %mail) {
684 print $out "build: ERROR sending mail to $email_events\n";
685 print $out "'$Mail::Sendmail::error'\n";
686 }
687 }
688}
689
690
[1454]691sub parse_args {
692 my ($argref) = @_;
693
694 if (!parsargv::parse($argref,
695 'optionfile/.*/', \$optionfile,
[12003]696 'indextype/^(mg|mgpp|lucene)$/mg', \$indextype,
[1454]697 'append', \$append,
[12003]698 'manifest', \$manifest,
[1454]699 'remove_archives', \$remove_archives,
700 'remove_import', \$remove_import,
701 'buildtype/^(build|import)$/import', \$buildtype,
702 'maxdocs/^\-?\d+/-1', \$maxdocs,
703 'download/.+', \@download,
704 'collectdir/.*/', \$collectdir,
[14925]705 'site/.*/', \$site,
[1454]706 'dontinstall', \$dontinstall,
707 'save_archives', \$save_archives,
[1762]708 'out/.*/STDERR', \$out,
[2566]709 'make_writable', \$make_writable,
[1762]710 'log_events', \$log_events,
711 'event_log_file/.*/', \$event_log_file,
712 'email_events/.*/', \$email_events,
713 'mail_server/.*/', \$mail_server,
[2785]714 'statsfile/.*/STDERR', \$statsfile,
[1762]715 'event_header/.*/', \$event_header)) {
[1454]716
717 &print_usage();
718 die "\n";
719 }
720}
[2566]721
722
723END {
724
725 if ($make_writable) {
726 # chmod a+rw new collection
727 my $installed_collection = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection);
728 &recursive_chmod($installed_collection);
729
730 # chmod a+rw anything we've left laying about in the tmp directory
731 if (($collectdir ne "") &&
732 (!&util::filenames_equal ($collectdir, &util::filename_cat($ENV{'GSDLHOME'}, "collect")))) {
733 &recursive_chmod($collectdir);
734 }
735 }
736
[2774]737 # this will produce a .final file if one doesn't exist yet - that
738 # should only happen if there's been an error somewhere in the perl
739 # code
740 &final_out(7);
741
[2566]742 sub recursive_chmod {
743 my ($dir) = @_;
744 return unless -d $dir;
745
746 chmod (0777, $dir);
747
748 opendir (DIR, $dir) || die;
749 my @files = readdir DIR;
750 closedir DIR;
751
752 foreach my $file (@files) {
753 next if $file =~ /^\.\.?$/;
754 $file = &util::filename_cat($dir, $file);
755 if (-d $file) {
756 &recursive_chmod ($file);
757 } else {
758 chmod (0777, $file);
759 }
760 }
761 }
762}
Note: See TracBrowser for help on using the repository browser.