source: main/trunk/greenstone2/perllib/buildcolutils.pm@ 32343

Last change on this file since 32343 was 31753, checked in by ak19, 7 years ago

Two fixes Kathy requested: 1. when running buildcol, ONLY deactivate a collection surrounding the lock-sensitive make_infodatabase() calls IF incremental. 2. Allow buildcol.pl to accept activate parameters like library_url (library_name and skipactivation). full-(re)build and incremental-(re)build scripts already accept additional parameters such as with -activate:skipactivation, but buildcol.pl doesn't work that way.

  • Property svn:executable set to *
File size: 27.7 KB
Line 
1##############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4# of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
30#use strict;
31#no strict 'refs';
32
33use File::Basename;
34
35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use servercontrol;
41use gsprintf;
42use printusage;
43use parse2;
44
45## @method new()
46#
47# Parses up and validates the arguments to the build process before creating
48# the appropriate build process to do the actual work
49#
50# @note Added true incremental support - John Thompson, DL Consulting Ltd.
51# @note There were several bugs regarding using directories other than
52# "import" or "archives" during import and build quashed. - John
53# Thompson, DL Consulting Ltd.
54#
55# @param $incremental If true indicates this build should not regenerate all
56# the index and metadata files, and should instead just
57# append the information found in the archives directory
58# to the existing files. If this requires some complex
59# work so as to correctly insert into a classifier so be
60# it. Of course none of this is done here - instead the
61# incremental argument is passed to the document
62# processor.
63#
64sub new
65{
66 my $class = shift(@_);
67 my ($argv, $options, $opt_listall_options) = @_;
68
69 my $self = {'builddir' => undef,
70 'buildtype' => undef,
71 'close_faillog' => 0,
72 'close_out' => 0,
73 'mode' => '',
74 'orthogonalbuildtypes' => undef,
75 'realbuilddir' => undef,
76 'textindex' => '',
77 'xml' => 0
78 };
79
80 # general options available to all plugins
81 my $arguments = $options->{'args'};
82 my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
83 # If parse returns -1 then something has gone wrong
84 if ($intArgLeftinAfterParsing == -1)
85 {
86 &PrintUsage::print_txt_usage($options, "{buildcol.params}",1);
87 print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n";
88 die "\n";
89 }
90
91 # If $language has been specified, load the appropriate resource bundle
92 # (Otherwise, the default resource bundle will be loaded automatically)
93 if ($self->{'language'} && $self->{'language'} =~ /\S/)
94 {
95 &gsprintf::load_language_specific_resource_bundle($self->{'language'});
96 }
97
98 # Do we need 'listall' support in buildcol? If so, copy code from inexport
99 # later [jmt12]
100
101 # <insert explanation here>
102 if ($self->{'xml'})
103 {
104 &PrintUsage::print_xml_usage($options);
105 print "\n";
106 return bless($self, $class);
107 }
108
109 # the gli wants strings to be in UTF-8
110 if ($gli)
111 {
112 &gsprintf::output_strings_in_UTF8;
113 }
114
115 # If the user specified -h, then we output the usage
116 if (@$argv && $argv->[0] =~ /^\-+h/) {
117 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
118 die "\n";
119 }
120
121 # now check that we had exactly one leftover arg, which should be
122 # the collection name. We don't want to do this earlier, cos
123 # -xml arg doesn't need a collection name
124 if ($intArgLeftinAfterParsing != 1)
125 {
126 &PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
127 print STDERR "There should be one argument left after parsing the script args: the collection name.\n";
128 die "\n";
129 }
130
131 my $out = $self->{'out'};
132 if ($out !~ /^(STDERR|STDOUT)$/i)
133 {
134 open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
135 $out = "buildcolutils::OUT";
136 $self->{'close_out'} = 1;
137 }
138 $out->autoflush(1);
139 $self->{'out'} = $out;
140
141 # @ARGV should be only one item, the name of the collection
142 $self->{'collection'} = shift(@{$argv});
143
144 return bless($self, $class);
145}
146# new()
147
148# newCGI()?
149
150# @function get_collection
151#
152sub get_collection
153{
154 my $self = shift @_;
155 return $self->{'collection'};
156}
157# get_collection()
158
159# @function read_collection_cfg
160#
161sub read_collection_cfg
162{
163 my $self = shift(@_);
164 my ($collection, $options) = @_;
165
166 my $collectdir = $self->{'collectdir'};
167 my $site = $self->{'site'};
168 my $out = $self->{'out'};
169
170 # get and check the collection
171 if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
172 {
173 #&PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
174 die "\n";
175 }
176
177 # set gs_version 2/3
178 $self->{'gs_version'} = "2";
179 if ((defined $site) && ($site ne ""))
180 {
181 # gs3
182 $self->{'gs_version'} = "3";
183 }
184
185 # add collection's perllib dir into include path in case we have collection
186 # specific modules
187 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
188 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
189 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
190
191 # check that we can open the faillog
192 my $faillog = $self->{'faillog'};
193 if ($faillog eq "")
194 {
195 $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
196 }
197 # note that we're appending to the faillog here (import.pl clears it each time)
198 # this could potentially create a situation where the faillog keeps being added
199 # to over multiple builds (if the import process is being skipped)
200 open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
201 $faillog = 'buildcolutils::FAILLOG';
202 $faillog->autoflush(1);
203 $self->{'faillog'} = $faillog;
204 $self->{'faillogname'} = $faillog;
205 $self->{'close_faillog'} = 1;
206
207 # Read in the collection configuration file.
208 my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
209 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
210 my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
211
212 return ($config_filename, $collect_cfg);
213}
214# read_collection_cfg()
215
216# @function set_collection_options
217# This function copies across values for arguments from the collection
218# configuration file if they are not already provided by the user, then
219# sets reasonable defaults for any required arguments that remains without
220# a value.
221sub set_collection_options
222{
223 my $self = shift @_;
224 my ($collectcfg) = @_;
225 my ($buildtype, $orthogonalbuildtypes);
226
227 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
228 if (!defined($collectcfg->{'infodbtype'}))
229 {
230 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
231 }
232 # - just so I don't have to pass collectcfg around as well
233 $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
234
235 if ($self->{'verbosity'} !~ /\d+/)
236 {
237 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
238 {
239 $self->{'verbosity'} = $collectcfg->{'verbosity'};
240 }
241 else
242 {
243 $self->{'verbosity'} = 2; # the default
244 }
245 }
246
247 # we use searchtype for determining buildtype, but for old versions, use buildtype
248 if (defined $collectcfg->{'buildtype'})
249 {
250 $self->{'buildtype'} = $collectcfg->{'buildtype'};
251 }
252 elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
253 {
254 $self->{'buildtype'} = "mgpp";
255 }
256 else
257 {
258 $self->{'buildtype'} = "mg"; #mg is the default
259 }
260
261 if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
262 {
263 $self->{'textindex'} = $collectcfg->{'textcompress'};
264 }
265
266 # is it okay to always clobber or possible remain undefined? [jmt12]
267 if (defined $collectcfg->{'orthogonalbuildtypes'})
268 {
269 $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
270 }
271
272 # - resolve (and possibly set to default) builddir
273 if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
274 {
275 $self->{'archivedir'} = $collectcfg->{'archivedir'};
276 }
277 # Modified so that the archivedir, if provided as an argument, is made
278 # absolute if it isn't already
279 if ($self->{'archivedir'} eq "")
280 {
281 $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
282 }
283 else
284 {
285 $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'archivedir'});
286 }
287 # End Mod
288 $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
289 #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
290 #$self->{'archivedir'} =~ s/\/$//;
291
292 # - resolve (and possibly set to default) builddir
293 if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
294 {
295 $self->{'builddir'} = $collectcfg->{'builddir'};
296 }
297 if ($self->{'builddir'} eq "")
298 {
299 $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
300 if ($incremental)
301 {
302 &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
303 }
304 } else {
305 # make absolute if not already
306 $self->{'builddir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'builddir'});
307 }
308
309 $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
310 #$self->{'builddir'} =~ s/[\\\/]+/\//g;
311 #$self->{'builddir'} =~ s/\/$//;
312
313 if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
314 {
315 $self->{'cachedir'} = $collectcfg->{'cachedir'};
316 }
317
318 if ($self->{'maxdocs'} !~ /\-?\d+/)
319 {
320 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
321 {
322 $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
323 }
324 else
325 {
326 $self->{'maxdocs'} = -1; # the default
327 }
328 }
329
330 # always clobbers? [jmt12]
331 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
332 {
333 $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
334 }
335 if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
336 {
337 $self->{'maxnumeric'} = 4;
338 }
339
340 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
341 {
342 $self->{'debug'} = 1;
343 }
344
345 if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
346 {
347 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
348 {
349 $self->{'mode'} = $collectcfg->{'mode'};
350 }
351 else
352 {
353 $self->{'mode'} = "all"; # the default
354 }
355 }
356
357 # Presumably 'index' from the collect.cfg still works [jmt12]
358 if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
359 {
360 $self->{'indexname'} = $collectcfg->{'index'};
361 }
362 # - 'index' from the command line doesn't make it through parsing so I
363 # renamed this option 'indexname' [jmt12]
364 if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
365 {
366 $self->{'indexname'} = $collectcfg->{'indexname'};
367 }
368 # - we may also define the index level to build now [jmt12]
369 if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
370 {
371 $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
372 }
373
374 if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
375 {
376 if ($collectcfg->{'no_text'} =~ /^true$/i)
377 {
378 $self->{'no_text'} = 1;
379 }
380 }
381
382 if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
383 {
384 if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
385 {
386 $self->{'no_strip_html'} = 1;
387 }
388 }
389
390 if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
391 {
392 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
393 {
394 $self->{'store_metadata_coverage'} = 1;
395 }
396 }
397
398 if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
399 {
400 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
401 {
402 $self->{'remove_empty_classifications'} = 1;
403 }
404 }
405
406 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
407 {
408 $self->{'gli'} = 1;
409 }
410 if (!defined $self->{'gli'})
411 {
412 $self->{'gli'} = 0;
413 }
414
415 if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
416 {
417 $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
418 }
419
420 if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
421 $self->{'sections_index_document_metadata'} = 'never';
422 }
423
424 if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
425 {
426 $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
427 }
428
429 if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
430 $self->{'sections_sort_on_document_metadata'} = 'never';
431 }
432
433 my ($removeold, $keepold, $incremental, $incremental_mode)
434 = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
435 $self->{'incremental'}, 'building',
436 $collectcfg);
437 $self->{'removeold'} = $removeold;
438 $self->{'keepold'} = $keepold;
439 $self->{'incremental'} = $incremental;
440 $self->{'incremental_mode'} = $incremental_mode;
441
442 # New argument to track whether build is incremental
443 if (!defined $self->{'incremental'})
444 {
445 $self->{'incremental'} = 0;
446 }
447
448 #set the text index
449 if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
450 {
451 if ($self->{'textindex'} eq '')
452 {
453 $self->{'textindex'} = 'text';
454 }
455 }
456 else
457 {
458 $self->{'textindex'} = 'section:text';
459 }
460}
461# set_collection_options()
462
463# @function prepare_builders
464#
465sub prepare_builders
466{
467 my $self = shift @_;
468 my ($config_filename,$collectcfg) = @_;
469
470 my $archivedir = $self->{'archivedir'};
471 my $builddir = $self->{'builddir'};
472 my $buildtype = $self->{'buildtype'};
473 my $cachedir = $self->{'cachedir'};
474 my $collectdir = $self->{'collectdir'};
475 my $collection = $self->{'collection'};
476 my $debug = $self->{'debug'};
477 my $faillog = $self->{'faillog'};
478 my $gli = $self->{'gli'};
479 my $incremental = $self->{'incremental'};
480 my $incremental_mode = $self->{'incremental_mode'};
481 my $keepold = $self->{'keepold'};
482 my $maxdocs = $self->{'maxdocs'};
483 my $maxnumeric = $self->{'maxnumeric'};
484 my $no_strip_html = $self->{'no_strip_html'};
485 my $no_text = $self->{'no_text'};
486 my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
487 my $out = $self->{'out'};
488 my $remove_empty_classifications = $self->{'remove_empty_classifications'};
489 my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
490 my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
491 my $site = $self->{'site'};
492 my $store_metadata_coverage = $self->{'store_metadata_coverage'};
493 my $verbosity = $self->{'verbosity'};
494
495 if ($gli)
496 {
497 print STDERR "<Build>\n";
498 }
499
500 # fill in the default archives and building directories if none
501 # were supplied, turn all \ into / and remove trailing /
502
503 my ($realarchivedir, $realbuilddir);
504 # update the archive cache if needed
505 if ($cachedir)
506 {
507 if ($verbosity >= 1)
508 {
509 &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
510 }
511
512 $cachedir =~ s/[\\\/]+$//;
513 if ($cachedir !~ /collect[\/\\]$collection/)
514 {
515 $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
516 }
517
518 $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
519 $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
520 &FileUtils::makeAllDirectories($realarchivedir);
521 &FileUtils::makeAllDirectories($realbuilddir);
522 &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
523 }
524 else
525 {
526 $realarchivedir = $archivedir;
527 $realbuilddir = $builddir;
528 }
529 $self->{'realarchivedir'} = $realarchivedir;
530 $self->{'realbuilddir'} = $realbuilddir;
531
532 # build it in realbuilddir
533 &FileUtils::makeAllDirectories($realbuilddir);
534
535 my ($buildertype, $builderdir, $builder);
536 # if a builder class has been created for this collection, use it
537 # otherwise, use the mg or mgpp builder
538 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
539 {
540 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
541 $buildertype = "custombuilder";
542 }
543 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
544 {
545 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
546 $buildertype = "custombuilder";
547 }
548 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
549 {
550 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
551 $buildertype = $collection . 'builder';
552 }
553 else
554 {
555 $builderdir = undef;
556 if ($buildtype ne '')
557 {
558 # caters for extension-based build types, such as 'solr'
559 $buildertype = $buildtype . 'builder';
560 }
561 else
562 {
563 # Default to mgpp
564 $buildertype = 'mgppbuilder';
565 }
566 }
567 # check for extension specific builders
568 # (that will then be run after main builder.pm
569 my @builderdir_list = ($builderdir);
570 my @buildertype_list = ($buildertype);
571
572 my $mode = $self->{'mode'};
573
574 if ($mode eq "extra") {
575 # knock out the main builder type, by reseting the lists to be empty
576 @builderdir_list = ();
577 @buildertype_list = ();
578 }
579
580 if (defined $orthogonalbuildtypes)
581 {
582 foreach my $obt (@$orthogonalbuildtypes)
583 {
584 push(@builderdir_list,undef); # rely on @INC to find it
585 push(@buildertype_list,$obt."Builder");
586 }
587 }
588
589 # Set up array of the main builder.pm, followed by any ones
590 # from the extension folders
591
592 my $num_builders = scalar(@buildertype_list);
593 my @builders = ();
594
595 for (my $i=0; $i<$num_builders; $i++)
596 {
597 my $this_builder;
598 my $this_buildertype = $buildertype_list[$i];
599 my $this_builderdir = $builderdir_list[$i];
600
601 if ((defined $this_builderdir) && ($this_builderdir ne ""))
602 {
603 require "$this_builderdir/$this_buildertype.pm";
604 }
605 else
606 {
607 require "$this_buildertype.pm";
608 }
609
610 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
611 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
612 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
613 "\$remove_empty_classifications, " .
614 "\$out, \$no_text, \$faillog, \$gli)");
615 die "$@" if $@;
616
617 push(@builders,$this_builder);
618 }
619
620 # Init phase for builders
621 for (my $i=0; $i<$num_builders; $i++)
622 {
623 my $this_buildertype = $buildertype_list[$i];
624 my $this_builderdir = $builderdir_list[$i];
625 my $this_builder = $builders[$i];
626
627 $this_builder->init();
628 $this_builder->set_maxnumeric($maxnumeric);
629
630 if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
631 {
632 $this_builder->set_strip_html(0);
633 }
634
635 if ($sections_index_document_metadata ne "never")
636 {
637 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
638 }
639 if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
640 {
641 $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
642 }
643
644 if ($store_metadata_coverage)
645 {
646 $this_builder->set_store_metadata_coverage(1);
647 }
648 }
649 return \@builders;
650}
651
652sub build_collection
653{
654 my $self = shift(@_);
655 my @builders = @{shift(@_)};
656
657 my $indexlevel = $self->{'indexlevel'};
658 my $indexname = $self->{'indexname'};
659 my $mode = $self->{'mode'};
660 my $textindex = $self->{'textindex'};
661
662 # Run the requested passes
663 if ($mode =~ /^(all|extra)$/i)
664 {
665 # 'map' modifies the elements of the original array, so calling
666 # methods -- as done below -- will cause (by default) @builders
667 # to be changed to whatever these functions return (which is *not*
668 # what we want -- we want to leave the values unchanged)
669 # => Use 'local' (dynamic scoping) to give each 'map' call its
670 # own local copy This could also be done with:
671 # (my $new =$_)->method(); $new
672 # but is a bit more cumbersome to write
673 map { local $_=$_; $_->compress_text($textindex); } @builders;
674 # - we pass the required indexname and indexlevel (if specified) to the
675 # processor [jmt12]
676 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
677
678 # If incremental, need to deactivate the collection for collections whose db don't support concurrent R+W
679 # All except the collection (1st parameter) can be empty. For GS3, also set the site parameter
680 my $gsserver = new servercontrol( $self->get_collection(), $self->{'site'}, $self->{'verbosity'}, $self->{'builddir'}, $self->{'indexdir'}, $self->{'collectdir'}, $self->{'library_url'}, $self->{'library_name'});
681
682 # when *incrementally* rebuilding a collection using any db that *doesn't* support concurrent
683 # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()
684 map {
685 local $_=$_;
686
687 if($_->supports_make_infodatabase()) {
688 my $infodbtype = $_->{'infodbtype'};
689 my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
690
691 if(!$dbSupportsConcurrentRW && $self->{'incremental'}) {
692 $gsserver->print_task_msg("About to deactivate collection ".$self->get_collection());
693 $gsserver->do_deactivate();
694 }
695 $_->make_infodatabase();
696 }
697
698 } @builders;
699
700 map { local $_=$_; $_->collect_specific(); } @builders;
701 }
702 elsif ($mode =~ /^compress_text$/i)
703 {
704 map { local $_=$_; $_->compress_text($textindex); } @builders;
705 }
706 elsif ($mode =~ /^build_index$/i)
707 {
708 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
709 }
710 elsif ($mode =~ /^infodb$/i)
711 {
712 map {
713 local $_=$_;
714
715 # when *incrementally* rebuilding a collection using any db that *doesn't* support concurrent
716 # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()
717
718 if($_->supports_make_infodatabase()) {
719 my $infodbtype = $_->{'infodbtype'};
720 my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
721
722 if(!$dbSupportsConcurrentRW && $self->{'incremental'}) {
723 $gsserver->print_task_msg("About to deactivate collection ".$self->get_collection());
724 $gsserver->do_deactivate();
725 }
726 $_->make_infodatabase();
727 }
728 } @builders;
729 }
730 else
731 {
732 (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
733 }
734}
735# build_collection()
736
737# @function build_auxiliary_files
738#
739sub build_auxiliary_files
740{
741 my $self = shift(@_);
742 my @builders = @{shift(@_)};
743 if (!$self->{'debug'})
744 {
745 map {local $_=$_; $_->make_auxiliary_files(); } @builders;
746 }
747}
748# build_auxiliary_files()
749
750# @function complete_builders
751#
752sub complete_builders
753{
754 my $self = shift(@_);
755 my @builders = @{shift(@_)};
756
757 map {local $_=$_; $_->deinit(); } @builders;
758
759 if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
760 {
761 if ($self->{'verbosity'} >= 1)
762 {
763 &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
764 }
765 &FileUtils::removeFilesRecursive($self->{'builddir'});
766 &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
767 }
768
769 # for RSS support: Need rss-items.rdf file in index folder
770 # check if a file called rss-items.rdf exists in archives, then copy it into the building folder
771 # so that when building is moved to index, this file will then also be in index as desired
772 my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
773 $self->{'collection'},
774 $self->{'site'});
775 my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
776 # @todo FileUtils
777 if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
778 {
779 if ($self->{'verbosity'} >= 1)
780 {
781 my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
782 my $builddir_tail = "'".basename($self->{'builddir'})."'";
783
784 &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
785 }
786 &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
787 }
788
789 if ($self->{'gli'})
790 {
791 print STDERR "</Build>\n";
792 }
793}
794# complete_builders()
795
796# @function activate_collection
797#
798sub activate_collection
799{
800 my $self = shift(@_);
801
802 # if buildcol.pl was run with -activate, need to run activate.pl
803 # now that building's complete
804 if ($self->{'activate'})
805 {
806 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
807 my @activate_argv = ();
808 push(@activate_argv, '-library_url', $self->{'library_url'}) if ($self->{'library_url'});
809 push(@activate_argv, '-library_name', $self->{'library_name'}) if ($self->{'library_name'});
810 push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
811 push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
812 push(@activate_argv, '-indexdir', $self->{'indexdir'}) if ($self->{'indexdir'});
813 push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
814 push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
815 push(@activate_argv, '-removeold') if ($self->{'removeold'});
816 push(@activate_argv, '-keepold') if ($self->{'keepold'});
817 push(@activate_argv, '-incremental') if ($self->{'incremental'});
818 push(@activate_argv, '-skipactivation', $self->{'skipactivation'}) if ($self->{'skipactivation'});
819
820 my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
821 my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
822 my $activatecol_status = system($activatecol_cmd)/256;
823
824 if ($activatecol_status != 0)
825 {
826 print STDERR "Error: Failed to run: $activatecol_cmd\n";
827 print STDERR " $!\n" if ($! ne '');
828 exit(-1);
829 }
830 }
831}
832
833# @function deinit()
834#
835sub deinit
836{
837 my $self = shift(@_);
838
839 if ($self->{'close_out'})
840 {
841 close OUT;
842 }
843 if ($self->{'close_faillog'})
844 {
845 close FAILLOG;
846 }
847}
848# deinit()
849
8501;
Note: See TracBrowser for help on using the repository browser.