source: main/trunk/greenstone2/perllib/buildcolutils.pm@ 29096

Last change on this file since 29096 was 29096, checked in by kjdon, 10 years ago

new argument to print_txt_usage. Pass 1 if you don't want the output paged. We use this when there has been an error and we are outputing the options before quitting the import/build. If the output is paged, then the die doesn't end up getting through to the top level program. So for full-rebuild, if the import died because of a parsing error, if the output had been paged, then the import was stopped but the system return value was 0, and then it would go on to the next stage, trying to build. So now, if we are stopping because of an error, then don't page the output. Also added a few more (hopefully) helpful error messages

  • Property svn:executable set to *
File size: 25.8 KB
RevLine 
[27304]1###############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4# of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
[28801]30#use strict;
31#no strict 'refs';
32
[28087]33use File::Basename;
34
[27304]35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use gsprintf;
41use printusage;
42use parse2;
43
44## @method new()
45#
46# Parses up and validates the arguments to the build process before creating
47# the appropriate build process to do the actual work
48#
49# @note Added true incremental support - John Thompson, DL Consulting Ltd.
50# @note There were several bugs regarding using directories other than
51# "import" or "archives" during import and build quashed. - John
52# Thompson, DL Consulting Ltd.
53#
54# @param $incremental If true indicates this build should not regenerate all
55# the index and metadata files, and should instead just
56# append the information found in the archives directory
57# to the existing files. If this requires some complex
58# work so as to correctly insert into a classifier so be
59# it. Of course none of this is done here - instead the
60# incremental argument is passed to the document
61# processor.
62#
63sub new
64{
65 my $class = shift(@_);
66 my ($argv, $options, $opt_listall_options) = @_;
67
68 my $self = {'builddir' => undef,
69 'buildtype' => undef,
70 'close_faillog' => 0,
71 'close_out' => 0,
72 'mode' => '',
73 'orthogonalbuildtypes' => undef,
74 'realbuilddir' => undef,
75 'textindex' => '',
76 'xml' => 0
77 };
78
79 # general options available to all plugins
80 my $arguments = $options->{'args'};
81 my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
82 # If parse returns -1 then something has gone wrong
83 if ($intArgLeftinAfterParsing == -1)
84 {
[29096]85 &PrintUsage::print_txt_usage($options, "{buildcol.params}",1);
86 print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n";
[27304]87 die "\n";
88 }
89
90 # If $language has been specified, load the appropriate resource bundle
91 # (Otherwise, the default resource bundle will be loaded automatically)
92 if ($self->{'language'} && $self->{'language'} =~ /\S/)
93 {
94 &gsprintf::load_language_specific_resource_bundle($self->{'language'});
95 }
96
97 # Do we need 'listall' support in buildcol? If so, copy code from inexport
98 # later [jmt12]
99
100 # <insert explanation here>
101 if ($self->{'xml'})
102 {
103 &PrintUsage::print_xml_usage($options);
104 print "\n";
105 return bless($self, $class);
106 }
107
108 # the gli wants strings to be in UTF-8
109 if ($gli)
110 {
111 &gsprintf::output_strings_in_UTF8;
112 }
[29096]113
114 # If the user specified -h, then we output the usage
115 if (@$argv && $argv->[0] =~ /^\-+h/) {
116 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
117 die "\n";
118 }
119
[27304]120 # now check that we had exactly one leftover arg, which should be
121 # the collection name. We don't want to do this earlier, cos
122 # -xml arg doesn't need a collection name
[29096]123 if ($intArgLeftinAfterParsing != 1)
[27304]124 {
[29096]125 &PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
126 print STDERR "There should be one argument left after parsing the script args: the collection name.\n";
[27304]127 die "\n";
128 }
129
130 my $out = $self->{'out'};
131 if ($out !~ /^(STDERR|STDOUT)$/i)
132 {
133 open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
134 $out = "buildcolutils::OUT";
135 $self->{'close_out'} = 1;
136 }
137 $out->autoflush(1);
138 $self->{'out'} = $out;
139
140 # @ARGV should be only one item, the name of the collection
141 $self->{'collection'} = shift(@{$argv});
142
143 return bless($self, $class);
144}
145# new()
146
147# newCGI()?
148
149# @function get_collection
150#
151sub get_collection
152{
153 my $self = shift @_;
154 return $self->{'collection'};
155}
156# get_collection()
157
158# @function read_collection_cfg
159#
160sub read_collection_cfg
161{
162 my $self = shift(@_);
163 my ($collection, $options) = @_;
164
165 my $collectdir = $self->{'collectdir'};
166 my $site = $self->{'site'};
167 my $out = $self->{'out'};
168
169 # get and check the collection
170 if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
171 {
[29096]172 #&PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
[27304]173 die "\n";
174 }
175
176 # set gs_version 2/3
177 $self->{'gs_version'} = "2";
178 if ((defined $site) && ($site ne ""))
179 {
180 # gs3
181 $self->{'gs_version'} = "3";
182 }
183
184 # add collection's perllib dir into include path in case we have collection
185 # specific modules
186 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
187 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
188 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
189
190 # check that we can open the faillog
191 my $faillog = $self->{'faillog'};
192 if ($faillog eq "")
193 {
194 $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
195 }
196 # note that we're appending to the faillog here (import.pl clears it each time)
197 # this could potentially create a situation where the faillog keeps being added
198 # to over multiple builds (if the import process is being skipped)
199 open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
200 $faillog = 'buildcolutils::FAILLOG';
201 $faillog->autoflush(1);
202 $self->{'faillog'} = $faillog;
203 $self->{'faillogname'} = $faillog;
204 $self->{'close_faillog'} = 1;
205
206 # Read in the collection configuration file.
207 my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
208 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
209 my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
210
211 return ($config_filename, $collect_cfg);
212}
213# read_collection_cfg()
214
215# @function set_collection_options
216# This function copies across values for arguments from the collection
217# configuration file if they are not already provided by the user, then
218# sets reasonable defaults for any required arguments that remains without
219# a value.
220sub set_collection_options
221{
222 my $self = shift @_;
223 my ($collectcfg) = @_;
224 my ($buildtype, $orthogonalbuildtypes);
225
226 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
227 if (!defined($collectcfg->{'infodbtype'}))
228 {
229 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
230 }
231 # - just so I don't have to pass collectcfg around as well
232 $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
233
234 if ($self->{'verbosity'} !~ /\d+/)
235 {
236 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
237 {
238 $self->{'verbosity'} = $collectcfg->{'verbosity'};
239 }
240 else
241 {
242 $self->{'verbosity'} = 2; # the default
243 }
244 }
245
246 # we use searchtype for determining buildtype, but for old versions, use buildtype
247 if (defined $collectcfg->{'buildtype'})
248 {
249 $self->{'buildtype'} = $collectcfg->{'buildtype'};
250 }
251 elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
252 {
253 $self->{'buildtype'} = "mgpp";
254 }
255 else
256 {
257 $self->{'buildtype'} = "mg"; #mg is the default
258 }
259
260 if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
261 {
262 $self->{'textindex'} = $collectcfg->{'textcompress'};
263 }
264
265 # is it okay to always clobber or possible remain undefined? [jmt12]
266 if (defined $collectcfg->{'orthogonalbuildtypes'})
267 {
268 $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
269 }
270
[27392]271 # - resolve (and possibly set to default) builddir
[27304]272 if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
273 {
274 $self->{'archivedir'} = $collectcfg->{'archivedir'};
275 }
[27392]276 # Modified so that the archivedir, if provided as an argument, is made
277 # absolute if it isn't already
278 if ($self->{'archivedir'} eq "")
[27304]279 {
[27392]280 $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
[27304]281 }
[27392]282 else
283 {
[29078]284 $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'archivedir'});
[27392]285 }
286 # End Mod
[27482]287 $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
288 #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
289 #$self->{'archivedir'} =~ s/\/$//;
[27304]290
[27392]291 # - resolve (and possibly set to default) builddir
[27304]292 if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
293 {
294 $self->{'builddir'} = $collectcfg->{'builddir'};
295 }
[27392]296 if ($self->{'builddir'} eq "")
297 {
298 $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
299 if ($incremental)
300 {
301 &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
302 }
[29078]303 } else {
304 # make absolute if not already
305 $self->{'builddir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'builddir'});
[27392]306 }
[29078]307
[27482]308 $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
309 #$self->{'builddir'} =~ s/[\\\/]+/\//g;
310 #$self->{'builddir'} =~ s/\/$//;
[27304]311
[27392]312 if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
313 {
314 $self->{'cachedir'} = $collectcfg->{'cachedir'};
315 }
316
[27304]317 if ($self->{'maxdocs'} !~ /\-?\d+/)
318 {
319 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
320 {
321 $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
322 }
323 else
324 {
325 $self->{'maxdocs'} = -1; # the default
326 }
327 }
328
329 # always clobbers? [jmt12]
330 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
331 {
332 $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
333 }
334 if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
335 {
336 $self->{'maxnumeric'} = 4;
337 }
338
339 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
340 {
341 $self->{'debug'} = 1;
342 }
343
[28801]344 if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
[27304]345 {
[28801]346 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
[27304]347 {
348 $self->{'mode'} = $collectcfg->{'mode'};
349 }
350 else
351 {
352 $self->{'mode'} = "all"; # the default
353 }
354 }
355
356 # Presumably 'index' from the collect.cfg still works [jmt12]
357 if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
358 {
359 $self->{'indexname'} = $collectcfg->{'index'};
360 }
361 # - 'index' from the command line doesn't make it through parsing so I
362 # renamed this option 'indexname' [jmt12]
363 if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
364 {
365 $self->{'indexname'} = $collectcfg->{'indexname'};
366 }
367 # - we may also define the index level to build now [jmt12]
368 if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
369 {
370 $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
371 }
372
373 if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
374 {
375 if ($collectcfg->{'no_text'} =~ /^true$/i)
376 {
377 $self->{'no_text'} = 1;
378 }
379 }
380
381 if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
382 {
383 if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
384 {
385 $self->{'no_strip_html'} = 1;
386 }
387 }
388
389 if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
390 {
391 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
392 {
393 $self->{'store_metadata_coverage'} = 1;
394 }
395 }
396
397 if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
398 {
399 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
400 {
401 $self->{'remove_empty_classifications'} = 1;
402 }
403 }
404
405 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
406 {
407 $self->{'gli'} = 1;
408 }
409 if (!defined $self->{'gli'})
410 {
411 $self->{'gli'} = 0;
412 }
413
414 if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
415 {
416 $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
417 }
418
419 if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
420 $self->{'sections_index_document_metadata'} = 'never';
421 }
422
[27563]423 if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
424 {
425 $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
426 }
427
428 if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
429 $self->{'sections_sort_on_document_metadata'} = 'never';
430 }
431
[27304]432 my ($removeold, $keepold, $incremental, $incremental_mode)
433 = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
434 $self->{'incremental'}, 'building',
435 $collectcfg);
436 $self->{'removeold'} = $removeold;
437 $self->{'keepold'} = $keepold;
438 $self->{'incremental'} = $incremental;
439 $self->{'incremental_mode'} = $incremental_mode;
440
441 # New argument to track whether build is incremental
442 if (!defined $self->{'incremental'})
443 {
444 $self->{'incremental'} = 0;
445 }
446
447 #set the text index
448 if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
449 {
450 if ($self->{'textindex'} eq '')
451 {
452 $self->{'textindex'} = 'text';
453 }
454 }
455 else
456 {
457 $self->{'textindex'} = 'section:text';
458 }
459}
460# set_collection_options()
461
462# @function prepare_builders
463#
464sub prepare_builders
465{
466 my $self = shift @_;
467 my ($config_filename,$collectcfg) = @_;
468
469 my $archivedir = $self->{'archivedir'};
470 my $builddir = $self->{'builddir'};
471 my $buildtype = $self->{'buildtype'};
472 my $cachedir = $self->{'cachedir'};
473 my $collectdir = $self->{'collectdir'};
474 my $collection = $self->{'collection'};
475 my $debug = $self->{'debug'};
476 my $faillog = $self->{'faillog'};
477 my $gli = $self->{'gli'};
478 my $incremental = $self->{'incremental'};
479 my $incremental_mode = $self->{'incremental_mode'};
480 my $keepold = $self->{'keepold'};
481 my $maxdocs = $self->{'maxdocs'};
482 my $maxnumeric = $self->{'maxnumeric'};
483 my $no_strip_html = $self->{'no_strip_html'};
484 my $no_text = $self->{'no_text'};
485 my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
486 my $out = $self->{'out'};
487 my $remove_empty_classifications = $self->{'remove_empty_classifications'};
488 my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
[27563]489 my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
[27304]490 my $site = $self->{'site'};
491 my $store_metadata_coverage = $self->{'store_metadata_coverage'};
492 my $verbosity = $self->{'verbosity'};
493
494 if ($gli)
495 {
496 print STDERR "<Build>\n";
497 }
498
499 # fill in the default archives and building directories if none
500 # were supplied, turn all \ into / and remove trailing /
501
502 my ($realarchivedir, $realbuilddir);
503 # update the archive cache if needed
504 if ($cachedir)
505 {
506 if ($verbosity >= 1)
507 {
508 &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
509 }
510
511 $cachedir =~ s/[\\\/]+$//;
512 if ($cachedir !~ /collect[\/\\]$collection/)
513 {
514 $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
515 }
516
517 $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
518 $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
519 &FileUtils::makeAllDirectories($realarchivedir);
520 &FileUtils::makeAllDirectories($realbuilddir);
[28566]521 &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
[27304]522 }
523 else
524 {
525 $realarchivedir = $archivedir;
526 $realbuilddir = $builddir;
527 }
[27392]528 $self->{'realarchivedir'} = $realarchivedir;
[27304]529 $self->{'realbuilddir'} = $realbuilddir;
530
531 # build it in realbuilddir
532 &FileUtils::makeAllDirectories($realbuilddir);
533
534 my ($buildertype, $builderdir, $builder);
535 # if a builder class has been created for this collection, use it
536 # otherwise, use the mg or mgpp builder
537 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
538 {
539 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
540 $buildertype = "custombuilder";
541 }
542 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
543 {
544 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
545 $buildertype = "custombuilder";
546 }
547 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
548 {
549 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
550 $buildertype = $collection . 'builder';
551 }
552 else
553 {
554 $builderdir = undef;
555 if ($buildtype ne '')
556 {
557 # caters for extension-based build types, such as 'solr'
558 $buildertype = $buildtype . 'builder';
559 }
560 else
561 {
562 # Default to mgpp
563 $buildertype = 'mgppbuilder';
564 }
565 }
566 # check for extension specific builders
567 # (that will then be run after main builder.pm
568 my @builderdir_list = ($builderdir);
569 my @buildertype_list = ($buildertype);
570
[28801]571 my $mode = $self->{'mode'};
572
573 if ($mode eq "extra") {
574 # knock out the main builder type, by reseting the lists to be empty
575 @builderdir_list = ();
576 @buildertype_list = ();
577 }
578
[27304]579 if (defined $orthogonalbuildtypes)
580 {
581 foreach my $obt (@$orthogonalbuildtypes)
582 {
583 push(@builderdir_list,undef); # rely on @INC to find it
584 push(@buildertype_list,$obt."Builder");
585 }
586 }
587
588 # Set up array of the main builder.pm, followed by any ones
589 # from the extension folders
590
591 my $num_builders = scalar(@buildertype_list);
592 my @builders = ();
593
594 for (my $i=0; $i<$num_builders; $i++)
595 {
596 my $this_builder;
597 my $this_buildertype = $buildertype_list[$i];
598 my $this_builderdir = $builderdir_list[$i];
599
600 if ((defined $this_builderdir) && ($this_builderdir ne ""))
601 {
602 require "$this_builderdir/$this_buildertype.pm";
603 }
604 else
605 {
606 require "$this_buildertype.pm";
607 }
608
609 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
610 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
611 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
612 "\$remove_empty_classifications, " .
613 "\$out, \$no_text, \$faillog, \$gli)");
614 die "$@" if $@;
615
616 push(@builders,$this_builder);
617 }
618
619 # Init phase for builders
620 for (my $i=0; $i<$num_builders; $i++)
621 {
622 my $this_buildertype = $buildertype_list[$i];
623 my $this_builderdir = $builderdir_list[$i];
624 my $this_builder = $builders[$i];
625
626 $this_builder->init();
627 $this_builder->set_maxnumeric($maxnumeric);
628
629 if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
630 {
631 $this_builder->set_strip_html(0);
632 }
633
634 if ($sections_index_document_metadata ne "never")
635 {
636 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
637 }
[28060]638 if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
[27563]639 {
640 $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
641 }
[27304]642
643 if ($store_metadata_coverage)
644 {
645 $this_builder->set_store_metadata_coverage(1);
646 }
647 }
648 return \@builders;
649}
650
651sub build_collection
652{
653 my $self = shift(@_);
654 my @builders = @{shift(@_)};
655
656 my $indexlevel = $self->{'indexlevel'};
657 my $indexname = $self->{'indexname'};
658 my $mode = $self->{'mode'};
659 my $textindex = $self->{'textindex'};
660
661 # Run the requested passes
[28801]662 if ($mode =~ /^(all|extra)$/i)
[27304]663 {
664 # 'map' modifies the elements of the original array, so calling
665 # methods -- as done below -- will cause (by default) @builders
666 # to be changed to whatever these functions return (which is *not*
667 # what we want -- we want to leave the values unchanged)
668 # => Use 'local' (dynamic scoping) to give each 'map' call its
669 # own local copy This could also be done with:
670 # (my $new =$_)->method(); $new
671 # but is a bit more cumbersome to write
672 map { local $_=$_; $_->compress_text($textindex); } @builders;
673 # - we pass the required indexname and indexlevel (if specified) to the
674 # processor [jmt12]
675 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
676 map { local $_=$_; $_->make_infodatabase(); } @builders;
677 map { local $_=$_; $_->collect_specific(); } @builders;
678 }
679 elsif ($mode =~ /^compress_text$/i)
680 {
681 map { local $_=$_; $_->compress_text($textindex); } @builders;
682 }
683 elsif ($mode =~ /^build_index$/i)
684 {
685 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
686 }
687 elsif ($mode =~ /^infodb$/i)
688 {
689 map { local $_=$_; $_->make_infodatabase(); } @builders;
690 }
691 else
692 {
693 (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
694 }
695}
696# build_collection()
697
698# @function build_auxiliary_files
699#
700sub build_auxiliary_files
701{
702 my $self = shift(@_);
703 my @builders = @{shift(@_)};
704 if (!$self->{'debug'})
705 {
706 map {local $_=$_; $_->make_auxiliary_files(); } @builders;
707 }
708}
709# build_auxiliary_files()
710
711# @function complete_builders
712#
713sub complete_builders
714{
715 my $self = shift(@_);
716 my @builders = @{shift(@_)};
717
718 map {local $_=$_; $_->deinit(); } @builders;
719
720 if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
721 {
722 if ($self->{'verbosity'} >= 1)
723 {
724 &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
725 }
[28566]726 &FileUtils::removeFilesRecursive($self->{'builddir'});
727 &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
[27304]728 }
729
730 # for RSS support: Need rss-items.rdf file in index folder
731 # check if a file called rss-items.rdf exists in archives, then copy it into the building folder
732 # so that when building is moved to index, this file will then also be in index as desired
733 my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
734 $self->{'collection'},
735 $self->{'site'});
736 my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
737 # @todo FileUtils
[27392]738 if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
[27304]739 {
[27392]740 if ($self->{'verbosity'} >= 1)
[27304]741 {
[28087]742 my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
743 my $builddir_tail = "'".basename($self->{'builddir'})."'";
744
745 &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
[27304]746 }
[27392]747 &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
[27304]748 }
749
750 if ($self->{'gli'})
751 {
752 print STDERR "</Build>\n";
753 }
754}
755# complete_builders()
756
757# @function activate_collection
758#
759sub activate_collection
760{
761 my $self = shift(@_);
762 # if buildcol.pl was run with -activate, need to run activate.pl
763 # now that building's complete
764 if ($self->{'activate'})
765 {
766 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
767 my @activate_argv = ();
768 push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
769 push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
770 push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
771 push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
772 push(@activate_argv, '-removeold') if ($self->{'removeold'});
773 push(@activate_argv, '-keepold') if ($self->{'keepold'});
774 push(@activate_argv, '-incremental') if ($self->{'incremental'});
775 my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
[27791]776 my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
[27304]777 my $activatecol_status = system($activatecol_cmd)/256;
778
779 if ($activatecol_status != 0)
780 {
781 print STDERR "Error: Failed to run: $activatecol_cmd\n";
782 print STDERR " $!\n" if ($! ne '');
783 exit(-1);
784 }
785 }
786}
787
788# @function deinit()
789#
790sub deinit
791{
792 my $self = shift(@_);
793
794 if ($self->{'close_out'})
795 {
796 close OUT;
797 }
798 if ($self->{'close_faillog'})
799 {
800 close FAILLOG;
801 }
802}
803# deinit()
804
8051;
Note: See TracBrowser for help on using the repository browser.