source: main/trunk/greenstone2/bin/script/buildcol.pl@ 26567

Last change on this file since 26567 was 26567, checked in by ak19, 11 years ago

When a GS2 collection contains both collect.cfg and collectionConfig.xml (as advanced beatles does) the old code used to end up reading in the GS3 collectionConfig.xml instead of the GS2 collect.cfg and set the GS_mode to GS3. Now colcfg::get_collect_cfg_name takes the gs_mode (instead of determining this and returning it) and works out the collectcfg file name for the gs_mode. That means that the calling functions now need to work out the gs_mode. They do so by setting the gs_mode to gs3 if the site flag is present in the commandline, if not then it defaults to gs2. So from now on, the site flag must be specified for GS3 collections.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 23.4 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will build a particular collection.
30
31package buildcol;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
41
42 if (defined $ENV{'GSDL-RUN-SETUP'}) {
43 require util;
44 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
45 }
46
47 if (defined $ENV{'GSDLEXTS'}) {
48 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
49 foreach my $e (@extensions) {
50 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
51
52 unshift (@INC, "$ext_prefix/perllib");
53 unshift (@INC, "$ext_prefix/perllib/cpan");
54 unshift (@INC, "$ext_prefix/perllib/plugins");
55 unshift (@INC, "$ext_prefix/perllib/classify");
56 }
57 }
58 if (defined $ENV{'GSDL3EXTS'}) {
59 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
60 foreach my $e (@extensions) {
61 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
62
63 unshift (@INC, "$ext_prefix/perllib");
64 unshift (@INC, "$ext_prefix/perllib/cpan");
65 unshift (@INC, "$ext_prefix/perllib/plugins");
66 unshift (@INC, "$ext_prefix/perllib/classify");
67 }
68 }
69
70}
71
72use colcfg;
73use dbutil;
74use util;
75use scriptutil;
76use FileHandle;
77use gsprintf;
78use printusage;
79use parse2;
80
81use strict;
82no strict 'refs'; # allow filehandles to be variables and vice versa
83no strict 'subs'; # allow barewords (eg STDERR) as function arguments
84
85
86my $mode_list =
87 [ { 'name' => "all",
88 'desc' => "{buildcol.mode.all}" },
89 { 'name' => "compress_text",
90 'desc' => "{buildcol.mode.compress_text}" },
91 { 'name' => "build_index",
92 'desc' => "{buildcol.mode.build_index}" },
93 { 'name' => "infodb",
94 'desc' => "{buildcol.mode.infodb}" } ];
95
96my $sec_index_list =
97 [ {'name' => "never",
98 'desc' => "{buildcol.sections_index_document_metadata.never}" },
99 {'name' => "always",
100 'desc' => "{buildcol.sections_index_document_metadata.always}" },
101 {'name' => "unless_section_metadata_exists",
102 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
103 ];
104
105my $arguments =
106 [ { 'name' => "remove_empty_classifications",
107 'desc' => "{buildcol.remove_empty_classifications}",
108 'type' => "flag",
109 'reqd' => "no",
110 'modegli' => "2" },
111 { 'name' => "archivedir",
112 'desc' => "{buildcol.archivedir}",
113 'type' => "string",
114 'reqd' => "no",
115 'hiddengli' => "yes" },
116 { 'name' => "builddir",
117 'desc' => "{buildcol.builddir}",
118 'type' => "string",
119 'reqd' => "no",
120 'hiddengli' => "yes" },
121# { 'name' => "cachedir",
122# 'desc' => "{buildcol.cachedir}",
123# 'type' => "string",
124# 'reqd' => "no" },
125 { 'name' => "collectdir",
126 'desc' => "{buildcol.collectdir}",
127 'type' => "string",
128 # parsearg left "" as default
129 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
130 'reqd' => "no",
131 'hiddengli' => "yes" },
132 { 'name' => "site",
133 'desc' => "{buildcol.site}",
134 'type' => "string",
135 'deft' => "",
136 'reqd' => "no",
137 'hiddengli' => "yes" },
138 { 'name' => "debug",
139 'desc' => "{buildcol.debug}",
140 'type' => "flag",
141 'reqd' => "no",
142 'hiddengli' => "yes" },
143 { 'name' => "faillog",
144 'desc' => "{buildcol.faillog}",
145 'type' => "string",
146 # parsearg left "" as default
147 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
148 'reqd' => "no",
149 'modegli' => "3" },
150 { 'name' => "index",
151 'desc' => "{buildcol.index}",
152 'type' => "string",
153 'reqd' => "no",
154 'modegli' => "3" },
155 { 'name' => "incremental",
156 'desc' => "{buildcol.incremental}",
157 'type' => "flag",
158 'hiddengli' => "yes" },
159 { 'name' => "keepold",
160 'desc' => "{buildcol.keepold}",
161 'type' => "flag",
162 'reqd' => "no",
163 #'modegli' => "3",
164 'hiddengli' => "yes" },
165 { 'name' => "removeold",
166 'desc' => "{buildcol.removeold}",
167 'type' => "flag",
168 'reqd' => "no",
169 #'modegli' => "3",
170 'hiddengli' => "yes" },
171 { 'name' => "language",
172 'desc' => "{scripts.language}",
173 'type' => "string",
174 'reqd' => "no",
175 'modegli' => "3" },
176 { 'name' => "maxdocs",
177 'desc' => "{buildcol.maxdocs}",
178 'type' => "int",
179 'reqd' => "no",
180 'hiddengli' => "yes" },
181 { 'name' => "maxnumeric",
182 'desc' => "{buildcol.maxnumeric}",
183 'type' => "int",
184 'reqd' => "no",
185 'deft' => "4",
186 'range' => "4,512",
187 'modegli' => "3" },
188 { 'name' => "mode",
189 'desc' => "{buildcol.mode}",
190 'type' => "enum",
191 'list' => $mode_list,
192 # parsearg left "" as default
193# 'deft' => "all",
194 'reqd' => "no",
195 'modegli' => "3" },
196 { 'name' => "no_strip_html",
197 'desc' => "{buildcol.no_strip_html}",
198 'type' => "flag",
199 'reqd' => "no",
200 'modegli' => "3" },
201 { 'name' => "store_metadata_coverage",
202 'desc' => "{buildcol.store_metadata_coverage}",
203 'type' => "flag",
204 'reqd' => "no",
205 'modegli' => "3" },
206 { 'name' => "no_text",
207 'desc' => "{buildcol.no_text}",
208 'type' => "flag",
209 'reqd' => "no",
210 'modegli' => "2" },
211 { 'name' => "sections_index_document_metadata",
212 'desc' => "{buildcol.sections_index_document_metadata}",
213 'type' => "enum",
214 'list' => $sec_index_list,
215 'reqd' => "no",
216 'modegli' => "2" },
217 { 'name' => "out",
218 'desc' => "{buildcol.out}",
219 'type' => "string",
220 'deft' => "STDERR",
221 'reqd' => "no",
222 'hiddengli' => "yes" },
223 { 'name' => "verbosity",
224 'desc' => "{buildcol.verbosity}",
225 'type' => "int",
226 # parsearg left "" as default
227 #'deft' => "2",
228 'reqd' => "no",
229 'modegli' => "3" },
230 { 'name' => "gli",
231 'desc' => "",
232 'type' => "flag",
233 'reqd' => "no",
234 'hiddengli' => "yes" },
235 { 'name' => "xml",
236 'desc' => "{scripts.xml}",
237 'type' => "flag",
238 'reqd' => "no",
239 'hiddengli' => "yes" },
240 { 'name' => "activate",
241 'desc' => "{buildcol.activate}",
242 'type' => "flag",
243 'reqd' => "no",
244 'hiddengli' => "yes" },
245 ];
246
247my $options = { 'name' => "buildcol.pl",
248 'desc' => "{buildcol.desc}",
249 'args' => $arguments };
250
251
252# globals
253my $collection;
254my $configfilename;
255my $out;
256
257# used to signify "gs2"(default) or "gs3"
258my $gs_mode = "gs2";
259
260## @method gsprintf()
261# Print a string to the screen after looking it up from a locale dependant
262# strings file. This function is losely based on the idea of resource
263# bundles as used in Java.
264#
265# @param $error The STDERR stream.
266# @param $text The string containing GS keys that should be replaced with
267# their locale dependant equivilents.
268# @param $out The output stream.
269# @return The locale-based string to output.
270#
271sub gsprintf()
272{
273 return &gsprintf::gsprintf(@_);
274}
275## gsprintf() ##
276
277&main();
278
279## @method main()
280#
281# [Parses up and validates the arguments to the build process before creating
282# the appropriate build process to do the actual work - John]
283#
284# @note Added true incremental support - John Thompson, DL Consulting Ltd.
285# @note There were several bugs regarding using directories other than
286# "import" or "archives" during import and build quashed. - John
287# Thompson, DL Consulting Ltd.
288#
289# @param $incremental If true indicates this build should not regenerate all
290# the index and metadata files, and should instead just
291# append the information found in the archives directory
292# to the existing files. If this requires some complex
293# work so as to correctly insert into a classifier so be
294# it. Of course none of this is done here - instead the
295# incremental argument is passed to the document
296# processor.
297#
298sub main
299{
300 # command line args
301 my ($verbosity, $archivedir, $cachedir, $builddir, $site, $maxdocs,
302 $debug, $mode, $indexname, $removeold, $keepold,
303 $incremental, $incremental_mode,
304 $remove_empty_classifications,
305 $collectdir, $build, $type, $textindex,
306 $no_strip_html, $store_metadata_coverage,
307 $no_text, $faillog, $gli, $index, $language,
308 $sections_index_document_metadata, $maxnumeric, $activate);
309
310 my $xml = 0;
311 my $hashParsingResult = {};
312 # general options available to all plugins
313 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
314
315 # If parse returns -1 then something has gone wrong
316 if ($intArgLeftinAfterParsing == -1)
317 {
318 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
319 die "\n";
320 }
321
322 foreach my $strVariable (keys %$hashParsingResult)
323 {
324 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
325 }
326
327 # If $language has been specified, load the appropriate resource bundle
328 # (Otherwise, the default resource bundle will be loaded automatically)
329 if ($language && $language =~ /\S/) {
330 &gsprintf::load_language_specific_resource_bundle($language);
331 }
332
333 if ($xml) {
334 &PrintUsage::print_xml_usage($options);
335 print "\n";
336 return;
337 }
338
339 if ($gli) { # the gli wants strings to be in UTF-8
340 &gsprintf::output_strings_in_UTF8;
341 }
342
343 # now check that we had exactly one leftover arg, which should be
344 # the collection name. We don't want to do this earlier, cos
345 # -xml arg doesn't need a collection name
346 # Or if the user specified -h, then we output the usage also
347 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
348 {
349 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
350 die "\n";
351 }
352
353 $textindex = "";
354 my $close_out = 0;
355 if ($out !~ /^(STDERR|STDOUT)$/i) {
356 open (OUT, ">$out") ||
357 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
358 $out = "buildcol::OUT";
359 $close_out = 1;
360 }
361 $out->autoflush(1);
362
363 # get and check the collection
364 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
365 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
366 die "\n";
367 }
368
369 if ($faillog eq "") {
370 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
371 }
372 # note that we're appending to the faillog here (import.pl clears it each time)
373 # this could potentially create a situation where the faillog keeps being added
374 # to over multiple builds (if the import process is being skipped)
375 open (FAILLOG, ">>$faillog") ||
376 (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
377 $faillog = 'buildcol::FAILLOG';
378 $faillog->autoflush(1);
379
380 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
381 # Don't know why this didn't already happen, but now collection specific
382 # classify and plugins directory also added to include path
383 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/classify");
384 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/plugins");
385
386 # Read in the collection configuration file.
387 my ($collectcfg, $buildtype, $orthogonalbuildtypes);
388 if ((defined $site) && ($site ne "")) { # GS3
389 $gs_mode = "gs3";
390 }
391 $configfilename = &colcfg::get_collect_cfg_name($out, $gs_mode);
392 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
393
394 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
395 if (!defined($collectcfg->{'infodbtype'}))
396 {
397 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
398 }
399
400 if ($verbosity !~ /\d+/) {
401 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
402 $verbosity = $collectcfg->{'verbosity'};
403 } else {
404 $verbosity = 2; # the default
405 }
406 }
407 # we use searchtype for determining buildtype, but for old versions, use buildtype
408 if (defined $collectcfg->{'buildtype'}) {
409 $buildtype = $collectcfg->{'buildtype'};
410 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
411 $buildtype = "mgpp";
412 } else {
413 $buildtype = "mg"; #mg is the default
414 }
415
416 if (defined $collectcfg->{'orthogonalbuildtypes'}) {
417 $orthogonalbuildtypes = $collectcfg->{'orthogonalbuildtypes'};
418 }
419
420 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
421 $archivedir = $collectcfg->{'archivedir'};
422 }
423 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
424 $cachedir = $collectcfg->{'cachedir'};
425 }
426 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
427 $builddir = $collectcfg->{'builddir'};
428 }
429 if ($maxdocs !~ /\-?\d+/) {
430 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
431 $maxdocs = $collectcfg->{'maxdocs'};
432 } else {
433 $maxdocs = -1; # the default
434 }
435 }
436 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/) {
437 $maxnumeric = $collectcfg->{'maxnumeric'};
438 }
439
440 if ($maxnumeric < 4 || $maxnumeric > 512) {
441 $maxnumeric = 4;
442 }
443
444 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
445 $debug = 1;
446 }
447 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
448 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
449 $mode = $collectcfg->{'mode'};
450 } else {
451 $mode = "all"; # the default
452 }
453 }
454 if (defined $collectcfg->{'index'} && $indexname eq "") {
455 $indexname = $collectcfg->{'index'};
456 }
457 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
458 if ($collectcfg->{'no_text'} =~ /^true$/i) {
459 $no_text = 1;
460 }
461 }
462 if (defined $collectcfg->{'no_strip_html'} && $no_strip_html == 0) {
463 if ($collectcfg->{'no_strip_html'} =~ /^true$/i) {
464 $no_strip_html = 1;
465 }
466 }
467 if (defined $collectcfg->{'store_metadata_coverage'} && $store_metadata_coverage == 0) {
468 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i) {
469 $store_metadata_coverage = 1;
470 }
471 }
472 if (defined $collectcfg->{'remove_empty_classifications'} && $remove_empty_classifications == 0) {
473 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i) {
474 $remove_empty_classifications = 1;
475 }
476 }
477
478 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
479 $textindex = $collectcfg->{'textcompress'};
480 }
481 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
482 $gli = 1;
483 }
484
485 if ($sections_index_document_metadata !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'}) {
486 $sections_index_document_metadata = $collectcfg->{'sections_index_document_metadata'};
487 }
488
489 if ($sections_index_document_metadata !~ /^(never|always|unless_section_metadata_exists)$/) {
490 $sections_index_document_metadata = "never";
491 }
492
493 ($removeold, $keepold, $incremental, $incremental_mode)
494 = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
495 $incremental, "building",
496 $collectcfg);
497
498 $gli = 0 unless defined $gli;
499
500 # New argument to track whether build is incremental
501 $incremental = 0 unless defined $incremental;
502
503 print STDERR "<Build>\n" if $gli;
504
505 #set the text index
506 if (($buildtype eq "mgpp") || ($buildtype eq "lucene") || ($buildtype eq "solr")) {
507 if ($textindex eq "") {
508 $textindex = "text";
509 }
510 }
511 else {
512 $textindex = "section:text";
513 }
514
515 # fill in the default archives and building directories if none
516 # were supplied, turn all \ into / and remove trailing /
517
518 my ($realarchivedir, $realbuilddir);
519 # Modified so that the archivedir, if provided as an argument, is made
520 # absolute if it isn't already
521 if ($archivedir eq "")
522 {
523 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
524 }
525 else
526 {
527 $archivedir = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $archivedir);
528 }
529 # End Mod
530 $archivedir =~ s/[\\\/]+/\//g;
531 $archivedir =~ s/\/$//;
532
533 if ($builddir eq "") {
534 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building");
535 if ($incremental) {
536 &gsprintf($out, "{buildcol.incremental_default_builddir}\n");
537 }
538 }
539 $builddir =~ s/[\\\/]+/\//g;
540 $builddir =~ s/\/$//;
541
542 # update the archive cache if needed
543 if ($cachedir) {
544 &gsprintf($out, "{buildcol.updating_archive_cache}\n")
545 if ($verbosity >= 1);
546
547 $cachedir =~ s/[\\\/]+$//;
548 $cachedir .= "/collect/$collection" unless
549 $cachedir =~ /collect\/$collection/;
550
551 $realarchivedir = "$cachedir/archives";
552 $realbuilddir = "$cachedir/building";
553 &util::mk_all_dir ($realarchivedir);
554 &util::mk_all_dir ($realbuilddir);
555 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
556
557 } else {
558 $realarchivedir = $archivedir;
559 $realbuilddir = $builddir;
560 }
561
562 # build it in realbuilddir
563 &util::mk_all_dir ($realbuilddir);
564
565 my ($buildertype, $builderdir, $builder);
566 # if a builder class has been created for this collection, use it
567 # otherwise, use the mg or mgpp builder
568 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm") {
569 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
570 $buildertype = "custombuilder";
571 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm") {
572 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
573 $buildertype = "custombuilder";
574 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
575 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
576 $buildertype = "${collection}builder";
577 } else {
578
579 $builderdir = undef;
580 if ($buildtype ne "") {
581 # caters for extension-based build types, such as 'solr'
582 $buildertype = $buildtype."builder";
583 }
584 else {
585 # Default to mgpp
586 $buildertype = "mgppbuilder";
587 }
588 }
589 # check for extension specific builders
590 # (that will then be run after main builder.pm
591
592 my @builderdir_list = ($builderdir);
593 my @buildertype_list = ($buildertype);
594
595 if (defined $orthogonalbuildtypes) {
596 foreach my $obt (@$orthogonalbuildtypes) {
597
598 push(@builderdir_list,undef); # rely on @INC to find it
599 push(@buildertype_list,$obt."Builder");
600 }
601 }
602
603 # Set up array of the main builder.pm, followed by any ones
604 # from the extension folders
605
606 my $num_builders = scalar(@buildertype_list);
607 my @builders = ();
608
609 for (my $i=0; $i<$num_builders; $i++) {
610 my $this_builder;
611 my $this_buildertype = $buildertype_list[$i];
612 my $this_builderdir = $builderdir_list[$i];
613
614 if ((defined $this_builderdir) && ($this_builderdir ne "")) {
615 require "$this_builderdir/$this_buildertype.pm";
616 }
617 else {
618 require "$this_buildertype.pm";
619 }
620
621 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
622 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
623 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
624 "\$remove_empty_classifications, " .
625 "\$out, \$no_text, \$faillog, \$gli)");
626 die "$@" if $@;
627
628 push(@builders,$this_builder);
629 }
630
631 # Init phase for builders
632 for (my $i=0; $i<$num_builders; $i++) {
633 my $this_buildertype = $buildertype_list[$i];
634 my $this_builderdir = $builderdir_list[$i];
635 my $this_builder = $builders[$i];
636
637 $this_builder->init();
638 $this_builder->set_maxnumeric($maxnumeric);
639
640 if (($this_buildertype eq "mgppbuilder") && $no_strip_html) {
641 $this_builder->set_strip_html(0);
642 }
643
644 if ($sections_index_document_metadata ne "never") {
645 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
646 }
647
648 if ($store_metadata_coverage) {
649 $this_builder->set_store_metadata_coverage(1);
650 }
651 }
652
653 # Run the requested passes
654
655 if ($mode =~ /^all$/i) {
656
657 # 'map' modifies the elements of the original array, so calling
658 # methods -- as done below -- will cause (by default) @builders
659 # to be changed to whatever these functions return (which is *not*
660 # what we want -- we want to leave the values unchanged)
661 # => Use 'local' (dynamic scoping) to give each 'map' call its
662 # own local copy This could also be done with:
663 # (my $new =$_)->method(); $new
664 # but is a bit more cumbersome to write
665
666 map { local $_=$_; $_->compress_text($textindex); } @builders;
667 map { local $_=$_; $_->build_indexes($indexname); } @builders;
668 map { local $_=$_; $_->make_infodatabase(); } @builders;
669 map { local $_=$_; $_->collect_specific(); } @builders;
670 } elsif ($mode =~ /^compress_text$/i) {
671 map { local $_=$_; $_->compress_text($textindex); } @builders;
672 } elsif ($mode =~ /^build_index$/i) {
673 map { local $_=$_; $_->build_indexes($indexname); } @builders;
674 } elsif ($mode =~ /^infodb$/i) {
675 map { local $_=$_; $_->make_infodatabase(); } @builders;
676 } else {
677 (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
678 }
679
680 if (!$debug) {
681 map {local $_=$_; $_->make_auxiliary_files(); } @builders;
682 }
683 map {local $_=$_; $_->deinit(); } @builders;
684
685 if (($realbuilddir ne $builddir) && !$debug) {
686 &gsprintf($out, "{buildcol.copying_back_cached_build}\n")
687 if ($verbosity >= 1);
688 &util::rm_r ($builddir);
689 &util::cp_r ($realbuilddir, $builddir);
690 }
691
692 # if buildcol.pl was run with -activate, need to run activate.pl
693 # now that building's complete
694 if($activate) {
695
696 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
697
698 my @activate_argv = ();
699 push(@activate_argv,"-collectdir",$collectdir) if($collectdir);
700 push(@activate_argv,"-builddir",$builddir) if($builddir);
701 push(@activate_argv,"-site",$site) if($site);
702 push(@activate_argv,"-verbosity",$verbosity) if($verbosity);
703 push(@activate_argv,"-removeold") if($removeold);
704 push(@activate_argv,"-keepold") if($keepold);
705 push(@activate_argv,"-incremental") if($incremental);
706 my $quoted_argv = join(" ", map { "\"$_\"" } @activate_argv);
707
708 my $activatecol_cmd = "\"".&util::get_perl_exec()."\" -S activate.pl $quoted_argv \"$collection\"";
709
710 my $activatecol_status = system($activatecol_cmd)/256;
711
712 if ($activatecol_status != 0) {
713 print STDERR "Error: Failed to run: $activatecol_cmd\n";
714 print STDERR " $!\n" if ($! ne "");
715 exit(-1);
716 }
717 }
718
719 close OUT if $close_out;
720 close FAILLOG;
721
722 print STDERR "</Build>\n" if $gli;
723}
724
725
726
Note: See TracBrowser for help on using the repository browser.