source: main/trunk/greenstone2/bin/script/buildcol.pl@ 26976

Last change on this file since 26976 was 26976, checked in by ak19, 11 years ago

First commit for RSS support: copying rss-items.rdf file across from archives to building, so that eventually it will be copied into the index dir as is required.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.0 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will build a particular collection.
30
31package buildcol;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
41
42 if (defined $ENV{'GSDL-RUN-SETUP'}) {
43 require util;
44 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
45 }
46
47 if (defined $ENV{'GSDLEXTS'}) {
48 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
49 foreach my $e (@extensions) {
50 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
51
52 unshift (@INC, "$ext_prefix/perllib");
53 unshift (@INC, "$ext_prefix/perllib/cpan");
54 unshift (@INC, "$ext_prefix/perllib/plugins");
55 unshift (@INC, "$ext_prefix/perllib/classify");
56 }
57 }
58 if (defined $ENV{'GSDL3EXTS'}) {
59 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
60 foreach my $e (@extensions) {
61 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
62
63 unshift (@INC, "$ext_prefix/perllib");
64 unshift (@INC, "$ext_prefix/perllib/cpan");
65 unshift (@INC, "$ext_prefix/perllib/plugins");
66 unshift (@INC, "$ext_prefix/perllib/classify");
67 }
68 }
69
70}
71
72use colcfg;
73use dbutil;
74use util;
75use scriptutil;
76use FileHandle;
77use gsprintf;
78use printusage;
79use parse2;
80
81use strict;
82no strict 'refs'; # allow filehandles to be variables and vice versa
83no strict 'subs'; # allow barewords (eg STDERR) as function arguments
84
85
86my $mode_list =
87 [ { 'name' => "all",
88 'desc' => "{buildcol.mode.all}" },
89 { 'name' => "compress_text",
90 'desc' => "{buildcol.mode.compress_text}" },
91 { 'name' => "build_index",
92 'desc' => "{buildcol.mode.build_index}" },
93 { 'name' => "infodb",
94 'desc' => "{buildcol.mode.infodb}" } ];
95
96my $sec_index_list =
97 [ {'name' => "never",
98 'desc' => "{buildcol.sections_index_document_metadata.never}" },
99 {'name' => "always",
100 'desc' => "{buildcol.sections_index_document_metadata.always}" },
101 {'name' => "unless_section_metadata_exists",
102 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
103 ];
104
105my $arguments =
106 [ { 'name' => "remove_empty_classifications",
107 'desc' => "{buildcol.remove_empty_classifications}",
108 'type' => "flag",
109 'reqd' => "no",
110 'modegli' => "2" },
111 { 'name' => "archivedir",
112 'desc' => "{buildcol.archivedir}",
113 'type' => "string",
114 'reqd' => "no",
115 'hiddengli' => "yes" },
116 { 'name' => "builddir",
117 'desc' => "{buildcol.builddir}",
118 'type' => "string",
119 'reqd' => "no",
120 'hiddengli' => "yes" },
121# { 'name' => "cachedir",
122# 'desc' => "{buildcol.cachedir}",
123# 'type' => "string",
124# 'reqd' => "no" },
125 { 'name' => "collectdir",
126 'desc' => "{buildcol.collectdir}",
127 'type' => "string",
128 # parsearg left "" as default
129 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
130 'reqd' => "no",
131 'hiddengli' => "yes" },
132 { 'name' => "site",
133 'desc' => "{buildcol.site}",
134 'type' => "string",
135 'deft' => "",
136 'reqd' => "no",
137 'hiddengli' => "yes" },
138 { 'name' => "debug",
139 'desc' => "{buildcol.debug}",
140 'type' => "flag",
141 'reqd' => "no",
142 'hiddengli' => "yes" },
143 { 'name' => "faillog",
144 'desc' => "{buildcol.faillog}",
145 'type' => "string",
146 # parsearg left "" as default
147 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
148 'reqd' => "no",
149 'modegli' => "3" },
150 { 'name' => "index",
151 'desc' => "{buildcol.index}",
152 'type' => "string",
153 'reqd' => "no",
154 'modegli' => "3" },
155 { 'name' => "incremental",
156 'desc' => "{buildcol.incremental}",
157 'type' => "flag",
158 'hiddengli' => "yes" },
159 { 'name' => "keepold",
160 'desc' => "{buildcol.keepold}",
161 'type' => "flag",
162 'reqd' => "no",
163 #'modegli' => "3",
164 'hiddengli' => "yes" },
165 { 'name' => "removeold",
166 'desc' => "{buildcol.removeold}",
167 'type' => "flag",
168 'reqd' => "no",
169 #'modegli' => "3",
170 'hiddengli' => "yes" },
171 { 'name' => "language",
172 'desc' => "{scripts.language}",
173 'type' => "string",
174 'reqd' => "no",
175 'modegli' => "3" },
176 { 'name' => "maxdocs",
177 'desc' => "{buildcol.maxdocs}",
178 'type' => "int",
179 'reqd' => "no",
180 'hiddengli' => "yes" },
181 { 'name' => "maxnumeric",
182 'desc' => "{buildcol.maxnumeric}",
183 'type' => "int",
184 'reqd' => "no",
185 'deft' => "4",
186 'range' => "4,512",
187 'modegli' => "3" },
188 { 'name' => "mode",
189 'desc' => "{buildcol.mode}",
190 'type' => "enum",
191 'list' => $mode_list,
192 # parsearg left "" as default
193# 'deft' => "all",
194 'reqd' => "no",
195 'modegli' => "3" },
196 { 'name' => "no_strip_html",
197 'desc' => "{buildcol.no_strip_html}",
198 'type' => "flag",
199 'reqd' => "no",
200 'modegli' => "3" },
201 { 'name' => "store_metadata_coverage",
202 'desc' => "{buildcol.store_metadata_coverage}",
203 'type' => "flag",
204 'reqd' => "no",
205 'modegli' => "3" },
206 { 'name' => "no_text",
207 'desc' => "{buildcol.no_text}",
208 'type' => "flag",
209 'reqd' => "no",
210 'modegli' => "2" },
211 { 'name' => "sections_index_document_metadata",
212 'desc' => "{buildcol.sections_index_document_metadata}",
213 'type' => "enum",
214 'list' => $sec_index_list,
215 'reqd' => "no",
216 'modegli' => "2" },
217 { 'name' => "out",
218 'desc' => "{buildcol.out}",
219 'type' => "string",
220 'deft' => "STDERR",
221 'reqd' => "no",
222 'hiddengli' => "yes" },
223 { 'name' => "verbosity",
224 'desc' => "{buildcol.verbosity}",
225 'type' => "int",
226 # parsearg left "" as default
227 #'deft' => "2",
228 'reqd' => "no",
229 'modegli' => "3" },
230 { 'name' => "gli",
231 'desc' => "",
232 'type' => "flag",
233 'reqd' => "no",
234 'hiddengli' => "yes" },
235 { 'name' => "xml",
236 'desc' => "{scripts.xml}",
237 'type' => "flag",
238 'reqd' => "no",
239 'hiddengli' => "yes" },
240 { 'name' => "activate",
241 'desc' => "{buildcol.activate}",
242 'type' => "flag",
243 'reqd' => "no",
244 'hiddengli' => "yes" },
245 ];
246
247my $options = { 'name' => "buildcol.pl",
248 'desc' => "{buildcol.desc}",
249 'args' => $arguments };
250
251
252# globals
253my $collection;
254my $configfilename;
255my $out;
256
257# used to signify "gs2"(default) or "gs3"
258my $gs_mode = "gs2";
259
260## @method gsprintf()
261# Print a string to the screen after looking it up from a locale dependant
262# strings file. This function is losely based on the idea of resource
263# bundles as used in Java.
264#
265# @param $error The STDERR stream.
266# @param $text The string containing GS keys that should be replaced with
267# their locale dependant equivilents.
268# @param $out The output stream.
269# @return The locale-based string to output.
270#
271sub gsprintf()
272{
273 return &gsprintf::gsprintf(@_);
274}
275## gsprintf() ##
276
277&main();
278
279## @method main()
280#
281# [Parses up and validates the arguments to the build process before creating
282# the appropriate build process to do the actual work - John]
283#
284# @note Added true incremental support - John Thompson, DL Consulting Ltd.
285# @note There were several bugs regarding using directories other than
286# "import" or "archives" during import and build quashed. - John
287# Thompson, DL Consulting Ltd.
288#
289# @param $incremental If true indicates this build should not regenerate all
290# the index and metadata files, and should instead just
291# append the information found in the archives directory
292# to the existing files. If this requires some complex
293# work so as to correctly insert into a classifier so be
294# it. Of course none of this is done here - instead the
295# incremental argument is passed to the document
296# processor.
297#
298sub main
299{
300 # command line args
301 my ($verbosity, $archivedir, $cachedir, $builddir, $site, $maxdocs,
302 $debug, $mode, $indexname, $removeold, $keepold,
303 $incremental, $incremental_mode,
304 $remove_empty_classifications,
305 $collectdir, $build, $type, $textindex,
306 $no_strip_html, $store_metadata_coverage,
307 $no_text, $faillog, $gli, $index, $language,
308 $sections_index_document_metadata, $maxnumeric, $activate);
309
310 my $xml = 0;
311 my $hashParsingResult = {};
312 # general options available to all plugins
313 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
314
315 # If parse returns -1 then something has gone wrong
316 if ($intArgLeftinAfterParsing == -1)
317 {
318 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
319 die "\n";
320 }
321
322 foreach my $strVariable (keys %$hashParsingResult)
323 {
324 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
325 }
326
327 # If $language has been specified, load the appropriate resource bundle
328 # (Otherwise, the default resource bundle will be loaded automatically)
329 if ($language && $language =~ /\S/) {
330 &gsprintf::load_language_specific_resource_bundle($language);
331 }
332
333 if ($xml) {
334 &PrintUsage::print_xml_usage($options);
335 print "\n";
336 return;
337 }
338
339 if ($gli) { # the gli wants strings to be in UTF-8
340 &gsprintf::output_strings_in_UTF8;
341 }
342
343 # now check that we had exactly one leftover arg, which should be
344 # the collection name. We don't want to do this earlier, cos
345 # -xml arg doesn't need a collection name
346 # Or if the user specified -h, then we output the usage also
347 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
348 {
349 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
350 die "\n";
351 }
352
353 $textindex = "";
354 my $close_out = 0;
355 if ($out !~ /^(STDERR|STDOUT)$/i) {
356 open (OUT, ">$out") ||
357 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
358 $out = "buildcol::OUT";
359 $close_out = 1;
360 }
361 $out->autoflush(1);
362
363 # get and check the collection
364 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
365 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
366 die "\n";
367 }
368
369 if ($faillog eq "") {
370 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
371 }
372 # note that we're appending to the faillog here (import.pl clears it each time)
373 # this could potentially create a situation where the faillog keeps being added
374 # to over multiple builds (if the import process is being skipped)
375 open (FAILLOG, ">>$faillog") ||
376 (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
377 $faillog = 'buildcol::FAILLOG';
378 $faillog->autoflush(1);
379
380 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
381 # Don't know why this didn't already happen, but now collection specific
382 # classify and plugins directory also added to include path
383 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/classify");
384 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/plugins");
385
386 # Read in the collection configuration file.
387 my ($collectcfg, $buildtype, $orthogonalbuildtypes);
388 if ((defined $site) && ($site ne "")) { # GS3
389 $gs_mode = "gs3";
390 }
391 $configfilename = &colcfg::get_collect_cfg_name($out, $gs_mode);
392 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
393
394 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
395 if (!defined($collectcfg->{'infodbtype'}))
396 {
397 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
398 }
399
400 if ($verbosity !~ /\d+/) {
401 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
402 $verbosity = $collectcfg->{'verbosity'};
403 } else {
404 $verbosity = 2; # the default
405 }
406 }
407 # we use searchtype for determining buildtype, but for old versions, use buildtype
408 if (defined $collectcfg->{'buildtype'}) {
409 $buildtype = $collectcfg->{'buildtype'};
410 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
411 $buildtype = "mgpp";
412 } else {
413 $buildtype = "mg"; #mg is the default
414 }
415
416 if (defined $collectcfg->{'orthogonalbuildtypes'}) {
417 $orthogonalbuildtypes = $collectcfg->{'orthogonalbuildtypes'};
418 }
419
420 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
421 $archivedir = $collectcfg->{'archivedir'};
422 }
423 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
424 $cachedir = $collectcfg->{'cachedir'};
425 }
426 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
427 $builddir = $collectcfg->{'builddir'};
428 }
429 if ($maxdocs !~ /\-?\d+/) {
430 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
431 $maxdocs = $collectcfg->{'maxdocs'};
432 } else {
433 $maxdocs = -1; # the default
434 }
435 }
436 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/) {
437 $maxnumeric = $collectcfg->{'maxnumeric'};
438 }
439
440 if ($maxnumeric < 4 || $maxnumeric > 512) {
441 $maxnumeric = 4;
442 }
443
444 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
445 $debug = 1;
446 }
447 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
448 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
449 $mode = $collectcfg->{'mode'};
450 } else {
451 $mode = "all"; # the default
452 }
453 }
454 if (defined $collectcfg->{'index'} && $indexname eq "") {
455 $indexname = $collectcfg->{'index'};
456 }
457 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
458 if ($collectcfg->{'no_text'} =~ /^true$/i) {
459 $no_text = 1;
460 }
461 }
462 if (defined $collectcfg->{'no_strip_html'} && $no_strip_html == 0) {
463 if ($collectcfg->{'no_strip_html'} =~ /^true$/i) {
464 $no_strip_html = 1;
465 }
466 }
467 if (defined $collectcfg->{'store_metadata_coverage'} && $store_metadata_coverage == 0) {
468 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i) {
469 $store_metadata_coverage = 1;
470 }
471 }
472 if (defined $collectcfg->{'remove_empty_classifications'} && $remove_empty_classifications == 0) {
473 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i) {
474 $remove_empty_classifications = 1;
475 }
476 }
477
478 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
479 $textindex = $collectcfg->{'textcompress'};
480 }
481 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
482 $gli = 1;
483 }
484
485 if ($sections_index_document_metadata !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'}) {
486 $sections_index_document_metadata = $collectcfg->{'sections_index_document_metadata'};
487 }
488
489 if ($sections_index_document_metadata !~ /^(never|always|unless_section_metadata_exists)$/) {
490 $sections_index_document_metadata = "never";
491 }
492
493 ($removeold, $keepold, $incremental, $incremental_mode)
494 = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
495 $incremental, "building",
496 $collectcfg);
497
498 $gli = 0 unless defined $gli;
499
500 # New argument to track whether build is incremental
501 $incremental = 0 unless defined $incremental;
502
503 print STDERR "<Build>\n" if $gli;
504
505 #set the text index
506 if (($buildtype eq "mgpp") || ($buildtype eq "lucene") || ($buildtype eq "solr")) {
507 if ($textindex eq "") {
508 $textindex = "text";
509 }
510 }
511 else {
512 $textindex = "section:text";
513 }
514
515 # fill in the default archives and building directories if none
516 # were supplied, turn all \ into / and remove trailing /
517
518 my ($realarchivedir, $realbuilddir);
519 # Modified so that the archivedir, if provided as an argument, is made
520 # absolute if it isn't already
521 if ($archivedir eq "")
522 {
523 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
524 }
525 else
526 {
527 $archivedir = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $archivedir);
528 }
529 # End Mod
530 $archivedir =~ s/[\\\/]+/\//g;
531 $archivedir =~ s/\/$//;
532
533 if ($builddir eq "") {
534 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building");
535 if ($incremental) {
536 &gsprintf($out, "{buildcol.incremental_default_builddir}\n");
537 }
538 }
539 $builddir =~ s/[\\\/]+/\//g;
540 $builddir =~ s/\/$//;
541
542 # update the archive cache if needed
543 if ($cachedir) {
544 &gsprintf($out, "{buildcol.updating_archive_cache}\n")
545 if ($verbosity >= 1);
546
547 $cachedir =~ s/[\\\/]+$//;
548 $cachedir .= "/collect/$collection" unless
549 $cachedir =~ /collect\/$collection/;
550
551 $realarchivedir = "$cachedir/archives";
552 $realbuilddir = "$cachedir/building";
553 &util::mk_all_dir ($realarchivedir);
554 &util::mk_all_dir ($realbuilddir);
555 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
556
557 } else {
558 $realarchivedir = $archivedir;
559 $realbuilddir = $builddir;
560 }
561
562 # build it in realbuilddir
563 &util::mk_all_dir ($realbuilddir);
564
565 my ($buildertype, $builderdir, $builder);
566 # if a builder class has been created for this collection, use it
567 # otherwise, use the mg or mgpp builder
568 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm") {
569 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
570 $buildertype = "custombuilder";
571 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm") {
572 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
573 $buildertype = "custombuilder";
574 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
575 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
576 $buildertype = "${collection}builder";
577 } else {
578
579 $builderdir = undef;
580 if ($buildtype ne "") {
581 # caters for extension-based build types, such as 'solr'
582 $buildertype = $buildtype."builder";
583 }
584 else {
585 # Default to mgpp
586 $buildertype = "mgppbuilder";
587 }
588 }
589 # check for extension specific builders
590 # (that will then be run after main builder.pm
591
592 my @builderdir_list = ($builderdir);
593 my @buildertype_list = ($buildertype);
594
595 if (defined $orthogonalbuildtypes) {
596 foreach my $obt (@$orthogonalbuildtypes) {
597
598 push(@builderdir_list,undef); # rely on @INC to find it
599 push(@buildertype_list,$obt."Builder");
600 }
601 }
602
603 # Set up array of the main builder.pm, followed by any ones
604 # from the extension folders
605
606 my $num_builders = scalar(@buildertype_list);
607 my @builders = ();
608
609 for (my $i=0; $i<$num_builders; $i++) {
610 my $this_builder;
611 my $this_buildertype = $buildertype_list[$i];
612 my $this_builderdir = $builderdir_list[$i];
613
614 if ((defined $this_builderdir) && ($this_builderdir ne "")) {
615 require "$this_builderdir/$this_buildertype.pm";
616 }
617 else {
618 require "$this_buildertype.pm";
619 }
620
621 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
622 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
623 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
624 "\$remove_empty_classifications, " .
625 "\$out, \$no_text, \$faillog, \$gli)");
626 die "$@" if $@;
627
628 push(@builders,$this_builder);
629 }
630
631 # Init phase for builders
632 for (my $i=0; $i<$num_builders; $i++) {
633 my $this_buildertype = $buildertype_list[$i];
634 my $this_builderdir = $builderdir_list[$i];
635 my $this_builder = $builders[$i];
636
637 $this_builder->init();
638 $this_builder->set_maxnumeric($maxnumeric);
639
640 if (($this_buildertype eq "mgppbuilder") && $no_strip_html) {
641 $this_builder->set_strip_html(0);
642 }
643
644 if ($sections_index_document_metadata ne "never") {
645 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
646 }
647
648 if ($store_metadata_coverage) {
649 $this_builder->set_store_metadata_coverage(1);
650 }
651 }
652
653 # Run the requested passes
654
655 if ($mode =~ /^all$/i) {
656
657 # 'map' modifies the elements of the original array, so calling
658 # methods -- as done below -- will cause (by default) @builders
659 # to be changed to whatever these functions return (which is *not*
660 # what we want -- we want to leave the values unchanged)
661 # => Use 'local' (dynamic scoping) to give each 'map' call its
662 # own local copy This could also be done with:
663 # (my $new =$_)->method(); $new
664 # but is a bit more cumbersome to write
665
666 map { local $_=$_; $_->compress_text($textindex); } @builders;
667 map { local $_=$_; $_->build_indexes($indexname); } @builders;
668 map { local $_=$_; $_->make_infodatabase(); } @builders;
669 map { local $_=$_; $_->collect_specific(); } @builders;
670 } elsif ($mode =~ /^compress_text$/i) {
671 map { local $_=$_; $_->compress_text($textindex); } @builders;
672 } elsif ($mode =~ /^build_index$/i) {
673 map { local $_=$_; $_->build_indexes($indexname); } @builders;
674 } elsif ($mode =~ /^infodb$/i) {
675 map { local $_=$_; $_->make_infodatabase(); } @builders;
676 } else {
677 (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
678 }
679
680 if (!$debug) {
681 map {local $_=$_; $_->make_auxiliary_files(); } @builders;
682 }
683 map {local $_=$_; $_->deinit(); } @builders;
684
685 if (($realbuilddir ne $builddir) && !$debug) {
686 &gsprintf($out, "{buildcol.copying_back_cached_build}\n")
687 if ($verbosity >= 1);
688 &util::rm_r ($builddir);
689 &util::cp_r ($realbuilddir, $builddir);
690 }
691
692
693 # for RSS support: Need rss-items.rdf file in index folder
694 # check if a file called rss-items.rdf exists in archives, then copy it into the building folder
695 # so that when building is moved to index, this file will then also be in index as desired
696 my $collection_dir = &util::resolve_collection_dir($collectdir, $collection, $site);
697 my $rss_items_rdf_file = &util::filename_cat($archivedir, "rss-items.rdf");
698 if(defined $builddir && -d $builddir && -f $rss_items_rdf_file) {
699 &gsprintf($out, "{buildcol.copying_rss_items_rdf}\n") if ($verbosity >= 1);
700 &util::cp ($rss_items_rdf_file, $builddir);
701 }
702
703 # if buildcol.pl was run with -activate, need to run activate.pl
704 # now that building's complete
705 if($activate) {
706
707 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
708
709 my @activate_argv = ();
710 push(@activate_argv,"-collectdir",$collectdir) if($collectdir);
711 push(@activate_argv,"-builddir",$builddir) if($builddir);
712 push(@activate_argv,"-site",$site) if($site);
713 push(@activate_argv,"-verbosity",$verbosity) if($verbosity);
714 push(@activate_argv,"-removeold") if($removeold);
715 push(@activate_argv,"-keepold") if($keepold);
716 push(@activate_argv,"-incremental") if($incremental);
717 my $quoted_argv = join(" ", map { "\"$_\"" } @activate_argv);
718
719 my $activatecol_cmd = "\"".&util::get_perl_exec()."\" -S activate.pl $quoted_argv \"$collection\"";
720
721 my $activatecol_status = system($activatecol_cmd)/256;
722
723 if ($activatecol_status != 0) {
724 print STDERR "Error: Failed to run: $activatecol_cmd\n";
725 print STDERR " $!\n" if ($! ne "");
726 exit(-1);
727 }
728 }
729
730 close OUT if $close_out;
731 close FAILLOG;
732
733 print STDERR "</Build>\n" if $gli;
734}
735
736
737
Note: See TracBrowser for help on using the repository browser.