source: main/trunk/greenstone2/bin/script/build@ 29380

Last change on this file since 29380 was 29380, checked in by kjdon, 10 years ago

need the collection name in the path as collectdir is just the custom collect directory

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 26.7 KB
RevLine 
[1778]1#!/usr/bin/perl -w
[1184]2
[1778]3###########################################################################
4#
5# build --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2000 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
[1198]28# This perl script may be called directly or by running build.bat on
29# windows (build.bat is in bin\windows)
[1184]30
[1762]31# Note that this script has grown over time and now has many options for
32# use when called from within the collector. If it appears to
33# over-complicate things a little, that's why.
34
[23311]35# I think that currently (Nov 2010) its only officially used from the collector
36# and depositor
37
[1454]38package build;
39
[17253]40use strict;
41no strict 'refs';
42
[1438]43use FileHandle;
[1454]44use File::Copy;
[1438]45
[1184]46BEGIN {
[1198]47
48 die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
49 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
50 die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
51 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
[1184]52 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
[1438]53
54 STDOUT->autoflush(1);
55 STDERR->autoflush(1);
[1184]56}
57
[1778]58use lib qq($ENV{'GSDLHOME'}/perllib/cpan);
59use Mail::Sendmail;
[1184]60use parsargv;
61use util;
[29371]62use FileUtils;
[1454]63use cfgread;
[22589]64use colcfg;
65use dbutil;
[1184]66
[23313]67# set up greenstone environment
68&util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
69# is the following needed with the previous line??
[1678]70# set up path - this allows for paths not to be supplied to system calls
71# and overcomes problems when GSDLHOME contains spaces (double quoting
72# the call doesn't work on win2k and probably other variants of winnt)
73my $path_separator = ":";
74$path_separator = ";" if $ENV{'GSDLOS'} =~ /^windows$/;
[29371]75$ENV{'PATH'} = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}) .
76 $path_separator . &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", "script") .
[1678]77 $path_separator . $ENV{'PATH'};
78
[17253]79# all the input option variables
[22589]80my ($optionfile, $indextype, $append, $manifest, $remove_archives, $remove_import, $buildtype, $infodbtype, $maxdocs, @download, $collectdir, $site, $dontinstall, $save_archives, $out, $make_writable, $log_events, $event_log_file, $email_events, $mail_server, $statsfile, $event_header);
[17253]81
[1454]82&parse_args (\@ARGV);
[1198]83
84my ($collection) = @ARGV;
85
86if (!defined $collection || $collection !~ /\w/) {
87 print STDERR "You must specify a collection to build\n";
88 &print_usage();
89 die "\n";
90}
91
[1454]92if ($optionfile =~ /\w/) {
93 open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
94 my $line = [];
95 my $options = [];
96 while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
97 push (@$options, @$line);
98 }
99 close OPTIONS;
100 &parse_args ($options);
101}
102
[1198]103if ($maxdocs == -1) {
104 $maxdocs = "";
105} else {
106 $maxdocs = "-maxdocs $maxdocs";
107}
108
[1454]109my $cdir = $collectdir;
[22589]110my $gs_mode;
[17253]111if (defined $site && $site =~ /\w/)
[14925]112{
113 die "GSDL3HOME not set." unless $ENV{'GSDL3HOME'};
[29371]114 $cdir = &FileUtils::filenameConcatenate ($ENV{'GSDL3HOME'}, "sites", $site, "collect") unless $collectdir =~ /\w/;
[22589]115 $gs_mode = "gs3";
[14925]116}
117else
118{
[29371]119 $cdir = &FileUtils::filenameConcatenate ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
[22589]120 $gs_mode = "gs2";
[14925]121}
122
[29371]123my $importdir = &FileUtils::filenameConcatenate ($cdir, $collection, "import");
124my $archivedir = &FileUtils::filenameConcatenate ($cdir, $collection, "archives");
125my $buildingdir = &FileUtils::filenameConcatenate ($cdir, $collection, "building");
126my $indexdir = &FileUtils::filenameConcatenate ($cdir, $collection, "index");
127my $etcdir = &FileUtils::filenameConcatenate ($cdir, $collection, "etc");
128my $bindir = &FileUtils::filenameConcatenate ($ENV{'GSDLHOME'}, "bin");
[1198]129
[1431]130my $use_out = 0;
[1424]131my $outfile = $out;
132if ($out !~ /^(STDERR|STDOUT)$/i) {
133 open (OUT, ">$out") || die "Couldn't open output file $out\n";
134 $out = "OUT";
[1452]135
136 # delete any existing .final file
[29371]137 &FileUtils::removeFiles ("$outfile.final") if -e "$outfile.final";
[1452]138
[1431]139 $use_out = 1;
[1424]140}
141$out->autoflush(1);
142
[1454]143# delete any .kill file left laying around from a previously aborted build
[29371]144if (-e &FileUtils::filenameConcatenate ($cdir, $collection, ".kill")) {
145 &FileUtils::removeFiles (&FileUtils::filenameConcatenate ($cdir, $collection, ".kill"));
[1454]146}
147
[1762]148# get maintainer email address from main.cfg
149my $maintainer = "NULL";
[29371]150my $main_cfg = &FileUtils::filenameConcatenate ($ENV{'GSDLHOME'}, "etc", "main.cfg");
[1762]151my $cfgdata = &cfgread::read_cfg_file ($main_cfg, "maintainer");
152if (defined $cfgdata->{'maintainer'} && $cfgdata->{'maintainer'} =~ /\w/) {
153 $maintainer = $cfgdata->{'maintainer'};
154}
155# if maintainer is "NULL" email_events should be disabled
156if ($maintainer =~ /^NULL$/i) {
157 $email_events = "";
158}
159
[1184]160&main();
161
[2785]162if ($use_out) {
163 close OUT;
[1424]164
[2785]165 # if we've created a build log we'll copy it to the collection's etc
166 # directory
[22589]167 my ($final_etcdir);
[2785]168 if ($dontinstall) {
[29380]169 $final_etcdir = &FileUtils::filenameConcatenate($collectdir, $collection, "etc", "build.log");
[2785]170 } else {
[29371]171 $final_etcdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect", $collection, "etc", "build.log");
[2785]172 }
[29380]173
[29371]174 &FileUtils::copyFiles($outfile, $final_etcdir);
[2785]175}
176
[1184]177sub print_usage {
[2359]178 print STDOUT "\n";
179 print STDOUT "build: Builds a Greenstone collection (i.e. runs import.pl and buildcol.pl\n";
180 print STDOUT " then copies the resulting indexes to the correct place).\n\n";
181 print STDOUT " usage: $0 [options] collection-name\n\n";
182 print STDOUT " options:\n";
183 print STDOUT " -optionfile file Get options from file, useful on systems where\n";
184 print STDOUT " long command lines may cause problems\n";
[12003]185 print STDOUT " -indextype mg|mgpp|lucene \n";
186 print STDERR " Specify the type of indexer used in this collection\n";
187 print STDERR " If -append is used then -indextype is needed to \n";
188 print STDERR " determine how to run buildcol.pl as well as update\n";
189 print STDERR " 'building' and 'index' according.\n";
[2359]190 print STDOUT " -append Add new files to existing collection\n";
[12003]191 print STDOUT " -manifest Use manifest.xml file to determine which files to process.\n";
[2359]192 print STDOUT " -remove_archives Remove archives directory after successfully\n";
193 print STDOUT " building the collection.\n";
194 print STDOUT " -remove_import Remove import directory after successfully\n";
195 print STDOUT " importing the collection.\n";
196 print STDOUT " -buildtype build|import If 'build' attempt to build directly\n";
197 print STDOUT " from archives directory (bypassing import\n";
198 print STDOUT " stage). Defaults to 'import'\n";
199 print STDOUT " -maxdocs number Maximum number of documents to build\n";
200 print STDOUT " -download directory Directory (or file) to get import documents from.\n";
201 print STDOUT " There may be multiple download directories and they\n";
202 print STDOUT " may be of type http://, ftp://, or file://\n";
203 print STDOUT " Note that any existing import directory will be\n";
204 print STDOUT " deleted to make way for the downloaded data if\n";
205 print STDOUT " a -download option is supplied\n";
206 print STDOUT " -collectdir directory Collection directory (defaults to " .
[29371]207 &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}). "collect for Greenstone2;\n";
[14925]208 print STDOUT" for Greenstone3 use -site option and then collectdir default will be\n";
209 print STDOUT " set to the collect folder within that site.)\n";
210 print STDOUT " -site Specify the site within a Greenstone3 installation to use.\n";
[2359]211 print STDOUT " -dontinstall Only applicable if -collectdir is set to something\n";
212 print STDOUT " other than the default. -dontinstall will suppress\n";
213 print STDOUT " the default behaviour which is to install the\n";
214 print STDOUT " collection to the gsdl/collect directory once it has\n";
215 print STDOUT " been built.\n";
216 print STDOUT " -save_archives Create a copy of the existing archives directory\n";
217 print STDOUT " called archives.org\n";
218 print STDOUT " -out Filename or handle to print output status to.\n";
219 print STDOUT " The default is STDERR\n";
[2785]220 print STDOUT " -statsfile name Filename or handle to print import statistics to.\n";
221 print STDOUT " The default is STDERR\n";
[2566]222 print STDOUT " -make_writable If set build will make the collection and any\n";
223 print STDOUT " temporary files it created globally writable after\n";
224 print STDOUT " it finishes\n";
[2359]225 print STDOUT " -log_events Log important events (collection built successfully\n";
226 print STDOUT " etc.) to event_log_file\n";
227 print STDOUT " -event_log_file file File to append important events to (defaults to\n";
[29371]228 print STDOUT " " . &FileUtils::filenameConcatenate ($ENV{'GSDLHOME'}, "etc", "events.txt") . "\n";
[2359]229 print STDOUT " -email_events addr Comma separated list of email addresses to mail\n";
230 print STDOUT " details of important collection building events\n";
231 print STDOUT " -mail_server server The outgoing (SMTP) mail server to be used by\n";
232 print STDOUT " email_events. email_events will be disabled if\n";
233 print STDOUT " mail_server isn't set\n";
234 print STDOUT " -event_header file File containing a header to go on any event\n";
235 print STDOUT " messages. If not specified build will create a\n";
236 print STDOUT " generic header\n\n";
[2469]237 print STDOUT " [Type \"build | more\" if this help text scrolled off your screen]";
[2359]238 print STDOUT "\n" unless $ENV{'GSDLOS'} =~ /^windows$/i;
[1184]239}
240
241sub main {
[1454]242
243 if ($save_archives && -d $archivedir) {
244 print $out "caching original archives to ${archivedir}.org\n";
[29371]245 &FileUtils::copyFilesRecursive ($archivedir, "${archivedir}.org");
[1454]246 }
247
[1424]248 # do the download thing if we have any -download options
249 if (scalar (@download)) {
250 # remove any existing import data
[1431]251 if (&has_content ($importdir)) {
[23311]252 #print $out "build: WARNING: removing contents of $importdir\n";
[29371]253 #&FileUtils::removeFilesRecursive ($importdir);
[1431]254 }
[1454]255
[17253]256 foreach my $download_dir (@download) {
[1507]257
258 # remove any leading or trailing whitespace from filenames (just in case)
259 $download_dir =~ s/^\s+//;
260 $download_dir =~ s/\s+$//;
[1424]261
[1709]262 if ($download_dir =~ /^(http|ftp):\/\//) {
263 # use wget to mirror http or ftp urls
264 # options used are:
265 # -P = the directory to download documents to
266 # -np = don't ascend to parent directories. this means that only documents
267 # that live in the same directory or below on the same server as
268 # the given url will be downloaded
269 # -nv = not too verbose
270 # -r = recursively mirror
271 # -N = use time-stamping to see if an up-to-date local copy of each
272 # file already exists. this may be useful if wget fails and
273 # is restarted
274 # -l inf = infinite recursion depth
275 # -R "*\?*" = don't download cgi based urls
276 # -o = the output file to write download status to (only used if the -out
277 # option was given to build)
[1743]278
[24362]279 my $download_cmd = "\"".&util::get_perl_exec()."\" -S gsWget.pl -P \"$importdir\" -np -nv";
[1753]280 $download_cmd .= " -r -N -l inf -R \"*\\?*\"";
[1709]281 $download_cmd .= " -o \"$outfile.download\"" if $use_out;
282 $download_cmd .= " \"$download_dir\"";
283 system ($download_cmd);
[1424]284
[1709]285 # note that wget obeys the robot rules. this means that it will have
286 # downloaded a robots.txt file if one was present. since it's unlikely
287 # anyone really wants to include it in a collection we'll delete it.
288 # robots.txt shouldn't be more than two directories deep (I think it will
289 # always be exactly two deep but will look for it in the top directory too)
290 # so that's as far as we'll go looking for it.
291 if (opendir (DIR, $importdir)) {
292 my @files = readdir DIR;
293 closedir DIR;
294 foreach my $file (@files) {
295 next if $file =~ /^\.\.?$/;
296 if ($file =~ /^robots.txt$/i) {
[29371]297 &FileUtils::removeFiles (&FileUtils::filenameConcatenate ($importdir, $file));
[1709]298 last;
299 } else {
[29371]300 $file = &FileUtils::filenameConcatenate ($importdir, $file);
[1709]301 if (-d $file) {
302 if (opendir (DIR, $file)) {
[1743]303 my @files2 = readdir DIR;
[1709]304 closedir DIR;
[1743]305 foreach my $file2 (@files2) {
306 if ($file2 =~ /^robots.txt$/i) {
[29371]307 &FileUtils::removeFiles (&FileUtils::filenameConcatenate ($file, $file2));
[1709]308 last;
309 }
310 }
311 }
312 }
313 }
314 }
315 }
[1424]316
[1709]317 # if using output directory append the file download output to it
318 &append_file ($out, "$outfile.download");
319
[1424]320 } else {
321 # we assume anything not beginning with http:// or ftp://
322 # is a file or directory on the local file system.
[1485]323 $download_dir =~ s/^file:(\/\/)?//;
[1507]324 $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest
[1424]325
326 if (-e $download_dir) {
327 # copy download_dir and all it contains to the import directory
[24192]328 #Config{perlpath}, like $^X, is a special variable containing the full path to the current perl executable we are in
[24362]329 my $download_cmd = "\"".&util::get_perl_exec()."\" -S filecopy.pl";
[1454]330 $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[14925]331 $download_cmd .= " -site \"$site\"" if $site =~ /\w/;
[1431]332 $download_cmd .= " -out \"$outfile.download\"" if $use_out;
333 $download_cmd .= " \"" . $download_dir . "\" " . $collection;
[14925]334
335 my $download_status = system ($download_cmd);
336 if ($download_status > 0)
337 {
338 die "Failed to execute: $download_cmd\n";
339 }
340
341
[1431]342 # if using output directory append the file download output to it
343 &append_file ($out, "$outfile.download");
[1424]344 } else {
[1507]345 print $out "WARNING: '$download_dir' does not exist\n";
[1424]346 }
347 }
348 }
349 }
350
[14925]351 `echo $importdir ; ls $importdir `;
352
[22589]353 my $col_cfg_file;
354 if ($gs_mode eq "gs3") {
[29371]355 $col_cfg_file = &FileUtils::filenameConcatenate($etcdir, "collectionConfig.xml");
[22589]356 } else {
[29371]357 $col_cfg_file = &FileUtils::filenameConcatenate($etcdir, "collect.cfg");
[22589]358 }
359
360 my $collect_cfg = &colcfg::read_collection_cfg ($col_cfg_file, $gs_mode);
361 # get the database type for this collection from its configuration file (may be undefined)
362 $infodbtype = $collect_cfg->{'infodbtype'} || &dbutil::get_default_infodb_type();
363
364 my $archiveinf_doc_file_path = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
[21567]365 if (-e $archiveinf_doc_file_path) {
[1198]366 if (&has_content ($importdir)) {
[1184]367 if ($buildtype eq "build") {
368 &gsdl_build();
369 } else {
370 &gsdl_import();
371 &gsdl_build();
372 }
373 } else {
374 # there are archives but no import, build directly from archives
[1424]375 print $out "build: no import material was found, building directly\n";
376 print $out " from archives\n";
[1184]377 &gsdl_build();
378 }
379 } else {
[1198]380 if (&has_content ($importdir)) {
[1184]381 if ($buildtype eq "build") {
[1424]382 print $out "build: can't build directly from archives as no\n";
383 print $out " imported archives exist (did you forget to\n";
384 print $out " move the contents of $collection/import to\n";
385 print $out " collection/archives?)\n";
[1184]386 }
387 &gsdl_import();
[21567]388 if (&has_content ($archivedir, "^archiveinf-doc\..*\$")) {
[2381]389 &gsdl_build();
390 } else {
391 my $msg = "build: ERROR: The collection could not be built as no\n";
392 $msg .= " valid data was imported. Are at least some of\n";
393 $msg .= " the files you imported in a format that can be\n";
394 $msg .= " processed by the specified Greenstone plugins?\n";
395 print $out $msg;
396 &log_event ($msg);
397 &final_out (6) if $use_out;
398 die "\n";
399 }
[1184]400 } else {
401 # no import or archives
[2381]402 my $msg = "build: ERROR: The collection could not be built as it contains no data.\n";
[1762]403 print $out $msg;
404 &log_event ($msg);
[1438]405 &final_out (1) if $use_out;
[1184]406 die "\n";
407 }
408 }
[1454]409
410 if ($collectdir ne "" && !$dontinstall) {
[14925]411
412 my $install_collectdir;
413 if (defined $ENV{'GSDL3HOME'})
414 {
415
416 if ((defined $site) && ($site ne ""))
417 {
[29371]418 $install_collectdir = &FileUtils::filenameConcatenate ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
[14925]419 }
420 else
421 {
422 my $msg = "build: ERROR: Need to specify the site within the Greenstone3 installation.";
423 print $out $msg;
424 &log_event ($msg);
425 &final_out (6) if $use_out;
426 die "\n";
427 }
428 }
429 else
430 {
[29371]431 $install_collectdir = &FileUtils::filenameConcatenate ($ENV{'GSDLHOME'}, "collect");
[14925]432 }
433
[1454]434 if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
435
436 # install collection to gsdl/collect
437 print $out "installing the $collection collection\n";
[29371]438 my $newdir = &FileUtils::filenameConcatenate ($install_collectdir, $collection);
439 my $olddir = &FileUtils::filenameConcatenate ($collectdir, $collection);
[1454]440 if (-d $newdir) {
[1762]441 my $msg = "build: Could not install collection as $newdir\n" .
442 " already exists. Collection will remain at\n$olddir\n";
443
444 print $out $msg;
445 &log_event ($msg);
[1454]446 &final_out (4) if $use_out;
447 die "\n";
448 }
449 if (!&File::Copy::move ($olddir, $newdir)) {
[1762]450 my $msg = "build: Failed to install collection to $newdir\n" .
451 " Collection will remain at $olddir\n";
452 print $out $msg;
453 &log_event ($msg);
[1454]454 &final_out (5) if $use_out;
455 die "\n";
456 }
457 }
458 }
459
[1762]460 &log_event ("The $collection collection was built successfully\n");
[1438]461 &final_out (0) if $use_out;
[1184]462}
463
464sub gsdl_import {
465
[1424]466 print $out "importing the $collection collection\n\n";
[1198]467
[24192]468 #Config{perlpath}, like $^X, is a special variable containing the full path to the current perl executable we are in
[24362]469 my $import_cmd = "\"".&util::get_perl_exec()."\" -S import.pl";
[1431]470 $import_cmd .= " -out \"$outfile.import\"" if $use_out;
[12003]471 if ($append) {
472 $import_cmd .= " -keepold";
[13067]473 if (not $manifest) {
474 # if we are appending, with no manifest, assume incremental
475 $import_cmd .= " -incremental";
476 }
[12003]477 } else {
478 $import_cmd .= " -removeold";
479 }
480
481 $import_cmd .= " -manifest manifest.xml" if ($manifest);
[14925]482 $import_cmd .= " -site \"$site\"" if $site =~ /\w/;
[1454]483 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[2785]484 $import_cmd .= " -statsfile \"$statsfile\"" if $statsfile =~ /\w/;
[1431]485 $import_cmd .= " $maxdocs $collection";
[2785]486
[1431]487 system ($import_cmd);
488 # if using output directory append the import output to it
489 &append_file ($out, "$outfile.import");
490
[22589]491 my $archiveinf_doc_file_path = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
492
[21567]493 if (-e $archiveinf_doc_file_path) {
[1424]494 print $out "$collection collection imported successfully\n\n";
495 if ($remove_import) {
496 print $out "removing import directory ($importdir)\n";
[29371]497 &FileUtils::removeFilesRecursive ($importdir);
[1424]498 }
[1184]499 } else {
[22589]500 print $out "$archiveinf_doc_file_path not found. archives contents:\n";
[19485]501 print $out `ls $archivedir`;
502
[1762]503 my $msg = "build: ERROR: import.pl failed\n";
504 print $out "\n$msg";
505 &log_event ($msg);
[1438]506 &final_out (2) if $use_out;
[1454]507 die "\n";
[1184]508 }
509}
510
511sub gsdl_build {
512
[1424]513 print $out "building the $collection collection\n\n";
[1184]514
[24192]515 #Config{perlpath}, like $^X, is a special variable containing the full path to the current perl executable we are in
[24362]516 my $build_cmd = "\"".&util::get_perl_exec()."\" -S buildcol.pl";
[12003]517
518 my $removeold = 1;
519 if ($append) {
520 if ($indextype eq "lucene") {
[13067]521 $build_cmd .= " -keepold -incremental -builddir $indexdir";
[12003]522 $removeold = 0;
523 }
524 else {
525 $build_cmd .= " -removeold";
526 }
527 }
528 else {
529 $build_cmd .= " -removeold";
530 }
531
[1431]532 $build_cmd .= " -out \"$outfile.build\"" if $use_out;
[14925]533 $build_cmd .= " -site \"$site\"" if $site =~ /\w/;
[1454]534 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[1431]535 $build_cmd .= " $maxdocs $collection";
536 system ($build_cmd);
537 # if using output directory append the buildcol output to it
538 &append_file ($out, "$outfile.build");
539
[19485]540 my @db_exts = ( ".ldb", ".bdb", ".gdb", ".db" );
541 my $build_ok = 0;
542 foreach my $db_ext (@db_exts) {
[29371]543 if ($removeold && (-e &FileUtils::filenameConcatenate ($buildingdir, "text", "$collection$db_ext"))) {
[19485]544 $build_ok = 1;
545 last;
546 }
[29371]547 if (($removeold==0) && (-e &FileUtils::filenameConcatenate ($indexdir, "text", "$collection$db_ext"))) {
[19485]548 $build_ok = 1;
549 last;
550 }
551 }
552
553 if ($build_ok) {
[1424]554 print $out "$collection collection built successfully\n\n";
555 if ($remove_archives) {
556 print $out "removing archives directory ($archivedir)\n";
[29371]557 &FileUtils::removeFilesRecursive ($archivedir);
[1424]558 }
[1184]559 } else {
[1762]560 my $msg = "build: ERROR: buildcol.pl failed\n";
561 print $out "\n$msg";
562 &log_event ($msg);
[1454]563 &final_out (3) if $use_out;
564 die "\n";
[1184]565 }
566
[12003]567 if ($removeold) {
568 # replace old indexes with new ones
569 if (&has_content ($indexdir)) {
570 print $out "removing old indexes\n";
[29371]571 &FileUtils::removeFilesRecursive ($indexdir);
[12003]572 }
573 rmdir ($indexdir) if -d $indexdir;
574 &File::Copy::move ($buildingdir, $indexdir);
[1184]575 }
[12003]576 else {
[13067]577 # Do nothing. We have built into index dir rather than building dir
[12003]578 }
[1454]579
[13067]580 # remove the cached archives
[1461]581 if ($save_archives && -d "${archivedir}.org") {
[29371]582 &FileUtils::removeFilesRecursive ("${archivedir}.org");
[1454]583 }
[1184]584}
585
[2381]586# return 1 if $dir directory contains any files or sub-directories (other
587# than those specified in the $ignore regular expression)
[1184]588sub has_content {
[2381]589 my ($dir, $ignore) = @_;
[1184]590
591 if (!-d $dir) {return 0;}
[2381]592
[1184]593 opendir (DIR, $dir) || return 0;
594 my @files = readdir DIR;
[2488]595 closedir DIR;
[1184]596
597 foreach my $file (@files) {
598 if ($file !~ /^\.{1,2}$/) {
[2381]599 return 1 unless (defined $ignore && $file =~ /$ignore/);
[1184]600 }
601 }
[14925]602
[1184]603 return 0;
604}
[1431]605
606sub append_file {
607 my ($handle, $file) = @_;
608
609 open (FILE, $file) || return;
610 undef $/;
611 print $handle <FILE>;
612 $/ = "\n";
613 close FILE;
[29371]614 &FileUtils::removeFiles ($file);
[1431]615}
[1438]616
[2774]617# creates a file called $outfile.final and writes an output code to it.
[1438]618# An output code of 0 specifies that there was no error
619sub final_out {
620 my ($exit_code) = @_;
621
[2774]622 if ($use_out && (!-e "$outfile.final")) {
623
624 if (open (FINAL, ">$outfile.final")) {
625 print FINAL $exit_code;
626 close FINAL;
627 }
[1438]628 }
629}
[1454]630
[1762]631sub log_event {
632 my ($msg) = @_;
633
634 return unless ($log_events || $email_events);
635
636 # get the event header
[1778]637 my $eheader = "[Build Event]\n";
638 $eheader .= "Date: " . scalar localtime() . "\n";
[1762]639 if ($event_header ne "" && open (HEADER, $event_header)) {
640 undef $/;
[1778]641 $eheader .= <HEADER>;
[1762]642 $/ = "\n";
643 close HEADER;
644 } else {
645 $eheader .= "Collection: $collection\n";
[1778]646 $eheader .= "GSDLHOME: $ENV{'GSDLHOME'}\n";
647 $eheader .= "Build Location: $collectdir\n";
[1762]648 }
649
650 if ($log_events) {
651 my $fail = 0;
652 # append the event to the event log file
653 if ($event_log_file eq "" || !open (LOG, ">>$event_log_file")) {
654 # log file defaults to $GSDLHOME/etc/events.txt
[29371]655 $event_log_file = &FileUtils::filenameConcatenate ($ENV{'GSDLHOME'}, "etc", "events.txt");
[1762]656 if (!open (LOG, ">>$event_log_file")) {
657 print $out "build: ERROR: Couldn't open event log file $event_log_file\n";
658 $fail = 1;
659 }
660 }
661 if (!$fail) {
662 print LOG $eheader;
663 print LOG $msg;
[1778]664 print LOG "\n";
[1762]665 close LOG;
666 }
667 }
668
669 if ($email_events) {
670 # if mail_server isn't set email_events does nothing
671 if ($mail_server eq "") {
672 print $out "build: WARNING: mail_server was not set - email_events option was ignored\n";
673 return;
674 }
675
676 my %mail = ('SMTP' => $mail_server,
677 'To' => $email_events,
678 'From' => $maintainer,
679 'Subject' => 'Greenstone Build Event'
680 );
681 $mail{'Message'} = $eheader . $msg;
682
683 if (!sendmail %mail) {
684 print $out "build: ERROR sending mail to $email_events\n";
685 print $out "'$Mail::Sendmail::error'\n";
686 }
687 }
688}
689
690
[1454]691sub parse_args {
692 my ($argref) = @_;
693
694 if (!parsargv::parse($argref,
695 'optionfile/.*/', \$optionfile,
[12003]696 'indextype/^(mg|mgpp|lucene)$/mg', \$indextype,
[1454]697 'append', \$append,
[12003]698 'manifest', \$manifest,
[1454]699 'remove_archives', \$remove_archives,
700 'remove_import', \$remove_import,
701 'buildtype/^(build|import)$/import', \$buildtype,
702 'maxdocs/^\-?\d+/-1', \$maxdocs,
703 'download/.+', \@download,
704 'collectdir/.*/', \$collectdir,
[14925]705 'site/.*/', \$site,
[1454]706 'dontinstall', \$dontinstall,
707 'save_archives', \$save_archives,
[1762]708 'out/.*/STDERR', \$out,
[2566]709 'make_writable', \$make_writable,
[1762]710 'log_events', \$log_events,
711 'event_log_file/.*/', \$event_log_file,
712 'email_events/.*/', \$email_events,
713 'mail_server/.*/', \$mail_server,
[2785]714 'statsfile/.*/STDERR', \$statsfile,
[1762]715 'event_header/.*/', \$event_header)) {
[1454]716
717 &print_usage();
718 die "\n";
719 }
720}
[2566]721
722
723END {
724
725 if ($make_writable) {
726 # chmod a+rw new collection
[29371]727 my $installed_collection = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect", $collection);
[2566]728 &recursive_chmod($installed_collection);
729
730 # chmod a+rw anything we've left laying about in the tmp directory
731 if (($collectdir ne "") &&
[29371]732 (!&util::filenames_equal ($collectdir, &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect")))) {
[2566]733 &recursive_chmod($collectdir);
734 }
735 }
736
[2774]737 # this will produce a .final file if one doesn't exist yet - that
738 # should only happen if there's been an error somewhere in the perl
739 # code
740 &final_out(7);
741
[2566]742 sub recursive_chmod {
743 my ($dir) = @_;
744 return unless -d $dir;
745
746 chmod (0777, $dir);
747
748 opendir (DIR, $dir) || die;
749 my @files = readdir DIR;
750 closedir DIR;
751
752 foreach my $file (@files) {
753 next if $file =~ /^\.\.?$/;
[29371]754 $file = &FileUtils::filenameConcatenate($dir, $file);
[2566]755 if (-d $file) {
756 &recursive_chmod ($file);
757 } else {
758 chmod (0777, $file);
759 }
760 }
761 }
762}
Note: See TracBrowser for help on using the repository browser.