source: main/trunk/greenstone2/perllib/buildcolutils.pm@ 30518

Last change on this file since 30518 was 30518, checked in by ak19, 5 years ago

Related to previous commit. Need to similarly handle another case of make_infodatabase().

  • Property svn:executable set to *
File size: 27.4 KB
Line 
1##############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4# of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
30#use strict;
31#no strict 'refs';
32
33use File::Basename;
34
35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use gsprintf;
41use printusage;
42use parse2;
43
44## @method new()
45#
46# Parses up and validates the arguments to the build process before creating
47# the appropriate build process to do the actual work
48#
49# @note Added true incremental support - John Thompson, DL Consulting Ltd.
50# @note There were several bugs regarding using directories other than
51# "import" or "archives" during import and build quashed. - John
52# Thompson, DL Consulting Ltd.
53#
54# @param $incremental If true indicates this build should not regenerate all
55# the index and metadata files, and should instead just
56# append the information found in the archives directory
57# to the existing files. If this requires some complex
58# work so as to correctly insert into a classifier so be
59# it. Of course none of this is done here - instead the
60# incremental argument is passed to the document
61# processor.
62#
63sub new
64{
65 my $class = shift(@_);
66 my ($argv, $options, $opt_listall_options) = @_;
67
68 my $self = {'builddir' => undef,
69 'buildtype' => undef,
70 'close_faillog' => 0,
71 'close_out' => 0,
72 'mode' => '',
73 'orthogonalbuildtypes' => undef,
74 'realbuilddir' => undef,
75 'textindex' => '',
76 'xml' => 0
77 };
78
79 # general options available to all plugins
80 my $arguments = $options->{'args'};
81 my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
82 # If parse returns -1 then something has gone wrong
83 if ($intArgLeftinAfterParsing == -1)
84 {
85 &PrintUsage::print_txt_usage($options, "{buildcol.params}",1);
86 print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n";
87 die "\n";
88 }
89
90 # If $language has been specified, load the appropriate resource bundle
91 # (Otherwise, the default resource bundle will be loaded automatically)
92 if ($self->{'language'} && $self->{'language'} =~ /\S/)
93 {
94 &gsprintf::load_language_specific_resource_bundle($self->{'language'});
95 }
96
97 # Do we need 'listall' support in buildcol? If so, copy code from inexport
98 # later [jmt12]
99
100 # <insert explanation here>
101 if ($self->{'xml'})
102 {
103 &PrintUsage::print_xml_usage($options);
104 print "\n";
105 return bless($self, $class);
106 }
107
108 # the gli wants strings to be in UTF-8
109 if ($gli)
110 {
111 &gsprintf::output_strings_in_UTF8;
112 }
113
114 # If the user specified -h, then we output the usage
115 if (@$argv && $argv->[0] =~ /^\-+h/) {
116 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
117 die "\n";
118 }
119
120 # now check that we had exactly one leftover arg, which should be
121 # the collection name. We don't want to do this earlier, cos
122 # -xml arg doesn't need a collection name
123 if ($intArgLeftinAfterParsing != 1)
124 {
125 &PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
126 print STDERR "There should be one argument left after parsing the script args: the collection name.\n";
127 die "\n";
128 }
129
130 my $out = $self->{'out'};
131 if ($out !~ /^(STDERR|STDOUT)$/i)
132 {
133 open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
134 $out = "buildcolutils::OUT";
135 $self->{'close_out'} = 1;
136 }
137 $out->autoflush(1);
138 $self->{'out'} = $out;
139
140 # @ARGV should be only one item, the name of the collection
141 $self->{'collection'} = shift(@{$argv});
142
143 return bless($self, $class);
144}
145# new()
146
147# newCGI()?
148
149# @function get_collection
150#
151sub get_collection
152{
153 my $self = shift @_;
154 return $self->{'collection'};
155}
156# get_collection()
157
158# @function read_collection_cfg
159#
160sub read_collection_cfg
161{
162 my $self = shift(@_);
163 my ($collection, $options) = @_;
164
165 my $collectdir = $self->{'collectdir'};
166 my $site = $self->{'site'};
167 my $out = $self->{'out'};
168
169 # get and check the collection
170 if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
171 {
172 #&PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
173 die "\n";
174 }
175
176 # set gs_version 2/3
177 $self->{'gs_version'} = "2";
178 if ((defined $site) && ($site ne ""))
179 {
180 # gs3
181 $self->{'gs_version'} = "3";
182 }
183
184 # add collection's perllib dir into include path in case we have collection
185 # specific modules
186 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
187 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
188 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
189
190 # check that we can open the faillog
191 my $faillog = $self->{'faillog'};
192 if ($faillog eq "")
193 {
194 $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
195 }
196 # note that we're appending to the faillog here (import.pl clears it each time)
197 # this could potentially create a situation where the faillog keeps being added
198 # to over multiple builds (if the import process is being skipped)
199 open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
200 $faillog = 'buildcolutils::FAILLOG';
201 $faillog->autoflush(1);
202 $self->{'faillog'} = $faillog;
203 $self->{'faillogname'} = $faillog;
204 $self->{'close_faillog'} = 1;
205
206 # Read in the collection configuration file.
207 my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
208 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
209 my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
210
211 return ($config_filename, $collect_cfg);
212}
213# read_collection_cfg()
214
215# @function set_collection_options
216# This function copies across values for arguments from the collection
217# configuration file if they are not already provided by the user, then
218# sets reasonable defaults for any required arguments that remains without
219# a value.
220sub set_collection_options
221{
222 my $self = shift @_;
223 my ($collectcfg) = @_;
224 my ($buildtype, $orthogonalbuildtypes);
225
226 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
227 if (!defined($collectcfg->{'infodbtype'}))
228 {
229 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
230 }
231 # - just so I don't have to pass collectcfg around as well
232 $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
233
234 if ($self->{'verbosity'} !~ /\d+/)
235 {
236 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
237 {
238 $self->{'verbosity'} = $collectcfg->{'verbosity'};
239 }
240 else
241 {
242 $self->{'verbosity'} = 2; # the default
243 }
244 }
245
246 # we use searchtype for determining buildtype, but for old versions, use buildtype
247 if (defined $collectcfg->{'buildtype'})
248 {
249 $self->{'buildtype'} = $collectcfg->{'buildtype'};
250 }
251 elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
252 {
253 $self->{'buildtype'} = "mgpp";
254 }
255 else
256 {
257 $self->{'buildtype'} = "mg"; #mg is the default
258 }
259
260 if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
261 {
262 $self->{'textindex'} = $collectcfg->{'textcompress'};
263 }
264
265 # is it okay to always clobber or possible remain undefined? [jmt12]
266 if (defined $collectcfg->{'orthogonalbuildtypes'})
267 {
268 $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
269 }
270
271 # - resolve (and possibly set to default) builddir
272 if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
273 {
274 $self->{'archivedir'} = $collectcfg->{'archivedir'};
275 }
276 # Modified so that the archivedir, if provided as an argument, is made
277 # absolute if it isn't already
278 if ($self->{'archivedir'} eq "")
279 {
280 $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
281 }
282 else
283 {
284 $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'archivedir'});
285 }
286 # End Mod
287 $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
288 #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
289 #$self->{'archivedir'} =~ s/\/$//;
290
291 # - resolve (and possibly set to default) builddir
292 if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
293 {
294 $self->{'builddir'} = $collectcfg->{'builddir'};
295 }
296 if ($self->{'builddir'} eq "")
297 {
298 $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
299 if ($incremental)
300 {
301 &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
302 }
303 } else {
304 # make absolute if not already
305 $self->{'builddir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'builddir'});
306 }
307
308 $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
309 #$self->{'builddir'} =~ s/[\\\/]+/\//g;
310 #$self->{'builddir'} =~ s/\/$//;
311
312 if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
313 {
314 $self->{'cachedir'} = $collectcfg->{'cachedir'};
315 }
316
317 if ($self->{'maxdocs'} !~ /\-?\d+/)
318 {
319 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
320 {
321 $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
322 }
323 else
324 {
325 $self->{'maxdocs'} = -1; # the default
326 }
327 }
328
329 # always clobbers? [jmt12]
330 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
331 {
332 $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
333 }
334 if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
335 {
336 $self->{'maxnumeric'} = 4;
337 }
338
339 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
340 {
341 $self->{'debug'} = 1;
342 }
343
344 if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
345 {
346 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
347 {
348 $self->{'mode'} = $collectcfg->{'mode'};
349 }
350 else
351 {
352 $self->{'mode'} = "all"; # the default
353 }
354 }
355
356 # Presumably 'index' from the collect.cfg still works [jmt12]
357 if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
358 {
359 $self->{'indexname'} = $collectcfg->{'index'};
360 }
361 # - 'index' from the command line doesn't make it through parsing so I
362 # renamed this option 'indexname' [jmt12]
363 if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
364 {
365 $self->{'indexname'} = $collectcfg->{'indexname'};
366 }
367 # - we may also define the index level to build now [jmt12]
368 if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
369 {
370 $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
371 }
372
373 if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
374 {
375 if ($collectcfg->{'no_text'} =~ /^true$/i)
376 {
377 $self->{'no_text'} = 1;
378 }
379 }
380
381 if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
382 {
383 if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
384 {
385 $self->{'no_strip_html'} = 1;
386 }
387 }
388
389 if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
390 {
391 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
392 {
393 $self->{'store_metadata_coverage'} = 1;
394 }
395 }
396
397 if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
398 {
399 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
400 {
401 $self->{'remove_empty_classifications'} = 1;
402 }
403 }
404
405 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
406 {
407 $self->{'gli'} = 1;
408 }
409 if (!defined $self->{'gli'})
410 {
411 $self->{'gli'} = 0;
412 }
413
414 if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
415 {
416 $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
417 }
418
419 if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
420 $self->{'sections_index_document_metadata'} = 'never';
421 }
422
423 if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
424 {
425 $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
426 }
427
428 if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
429 $self->{'sections_sort_on_document_metadata'} = 'never';
430 }
431
432 my ($removeold, $keepold, $incremental, $incremental_mode)
433 = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
434 $self->{'incremental'}, 'building',
435 $collectcfg);
436 $self->{'removeold'} = $removeold;
437 $self->{'keepold'} = $keepold;
438 $self->{'incremental'} = $incremental;
439 $self->{'incremental_mode'} = $incremental_mode;
440
441 # New argument to track whether build is incremental
442 if (!defined $self->{'incremental'})
443 {
444 $self->{'incremental'} = 0;
445 }
446
447 #set the text index
448 if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
449 {
450 if ($self->{'textindex'} eq '')
451 {
452 $self->{'textindex'} = 'text';
453 }
454 }
455 else
456 {
457 $self->{'textindex'} = 'section:text';
458 }
459}
460# set_collection_options()
461
462# @function prepare_builders
463#
464sub prepare_builders
465{
466 my $self = shift @_;
467 my ($config_filename,$collectcfg) = @_;
468
469 my $archivedir = $self->{'archivedir'};
470 my $builddir = $self->{'builddir'};
471 my $buildtype = $self->{'buildtype'};
472 my $cachedir = $self->{'cachedir'};
473 my $collectdir = $self->{'collectdir'};
474 my $collection = $self->{'collection'};
475 my $debug = $self->{'debug'};
476 my $faillog = $self->{'faillog'};
477 my $gli = $self->{'gli'};
478 my $incremental = $self->{'incremental'};
479 my $incremental_mode = $self->{'incremental_mode'};
480 my $keepold = $self->{'keepold'};
481 my $maxdocs = $self->{'maxdocs'};
482 my $maxnumeric = $self->{'maxnumeric'};
483 my $no_strip_html = $self->{'no_strip_html'};
484 my $no_text = $self->{'no_text'};
485 my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
486 my $out = $self->{'out'};
487 my $remove_empty_classifications = $self->{'remove_empty_classifications'};
488 my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
489 my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
490 my $site = $self->{'site'};
491 my $store_metadata_coverage = $self->{'store_metadata_coverage'};
492 my $verbosity = $self->{'verbosity'};
493
494 if ($gli)
495 {
496 print STDERR "<Build>\n";
497 }
498
499 # fill in the default archives and building directories if none
500 # were supplied, turn all \ into / and remove trailing /
501
502 my ($realarchivedir, $realbuilddir);
503 # update the archive cache if needed
504 if ($cachedir)
505 {
506 if ($verbosity >= 1)
507 {
508 &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
509 }
510
511 $cachedir =~ s/[\\\/]+$//;
512 if ($cachedir !~ /collect[\/\\]$collection/)
513 {
514 $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
515 }
516
517 $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
518 $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
519 &FileUtils::makeAllDirectories($realarchivedir);
520 &FileUtils::makeAllDirectories($realbuilddir);
521 &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
522 }
523 else
524 {
525 $realarchivedir = $archivedir;
526 $realbuilddir = $builddir;
527 }
528 $self->{'realarchivedir'} = $realarchivedir;
529 $self->{'realbuilddir'} = $realbuilddir;
530
531 # build it in realbuilddir
532 &FileUtils::makeAllDirectories($realbuilddir);
533
534 my ($buildertype, $builderdir, $builder);
535 # if a builder class has been created for this collection, use it
536 # otherwise, use the mg or mgpp builder
537 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
538 {
539 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
540 $buildertype = "custombuilder";
541 }
542 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
543 {
544 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
545 $buildertype = "custombuilder";
546 }
547 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
548 {
549 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
550 $buildertype = $collection . 'builder';
551 }
552 else
553 {
554 $builderdir = undef;
555 if ($buildtype ne '')
556 {
557 # caters for extension-based build types, such as 'solr'
558 $buildertype = $buildtype . 'builder';
559 }
560 else
561 {
562 # Default to mgpp
563 $buildertype = 'mgppbuilder';
564 }
565 }
566 # check for extension specific builders
567 # (that will then be run after main builder.pm
568 my @builderdir_list = ($builderdir);
569 my @buildertype_list = ($buildertype);
570
571 my $mode = $self->{'mode'};
572
573 if ($mode eq "extra") {
574 # knock out the main builder type, by reseting the lists to be empty
575 @builderdir_list = ();
576 @buildertype_list = ();
577 }
578
579 if (defined $orthogonalbuildtypes)
580 {
581 foreach my $obt (@$orthogonalbuildtypes)
582 {
583 push(@builderdir_list,undef); # rely on @INC to find it
584 push(@buildertype_list,$obt."Builder");
585 }
586 }
587
588 # Set up array of the main builder.pm, followed by any ones
589 # from the extension folders
590
591 my $num_builders = scalar(@buildertype_list);
592 my @builders = ();
593
594 for (my $i=0; $i<$num_builders; $i++)
595 {
596 my $this_builder;
597 my $this_buildertype = $buildertype_list[$i];
598 my $this_builderdir = $builderdir_list[$i];
599
600 if ((defined $this_builderdir) && ($this_builderdir ne ""))
601 {
602 require "$this_builderdir/$this_buildertype.pm";
603 }
604 else
605 {
606 require "$this_buildertype.pm";
607 }
608
609 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
610 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
611 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
612 "\$remove_empty_classifications, " .
613 "\$out, \$no_text, \$faillog, \$gli)");
614 die "$@" if $@;
615
616 push(@builders,$this_builder);
617 }
618
619 # Init phase for builders
620 for (my $i=0; $i<$num_builders; $i++)
621 {
622 my $this_buildertype = $buildertype_list[$i];
623 my $this_builderdir = $builderdir_list[$i];
624 my $this_builder = $builders[$i];
625
626 $this_builder->init();
627 $this_builder->set_maxnumeric($maxnumeric);
628
629 if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
630 {
631 $this_builder->set_strip_html(0);
632 }
633
634 if ($sections_index_document_metadata ne "never")
635 {
636 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
637 }
638 if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
639 {
640 $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
641 }
642
643 if ($store_metadata_coverage)
644 {
645 $this_builder->set_store_metadata_coverage(1);
646 }
647 }
648 return \@builders;
649}
650
651sub build_collection
652{
653 my $self = shift(@_);
654 my @builders = @{shift(@_)};
655
656 my $indexlevel = $self->{'indexlevel'};
657 my $indexname = $self->{'indexname'};
658 my $mode = $self->{'mode'};
659 my $textindex = $self->{'textindex'};
660
661 # Run the requested passes
662 if ($mode =~ /^(all|extra)$/i)
663 {
664 # 'map' modifies the elements of the original array, so calling
665 # methods -- as done below -- will cause (by default) @builders
666 # to be changed to whatever these functions return (which is *not*
667 # what we want -- we want to leave the values unchanged)
668 # => Use 'local' (dynamic scoping) to give each 'map' call its
669 # own local copy This could also be done with:
670 # (my $new =$_)->method(); $new
671 # but is a bit more cumbersome to write
672 map { local $_=$_; $_->compress_text($textindex); } @builders;
673 # - we pass the required indexname and indexlevel (if specified) to the
674 # processor [jmt12]
675 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
676
677 # when incrementally rebuilding a collection using any db that doesn't support concurrent
678 # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()
679 map {
680 local $_=$_;
681
682 if($_->supports_make_infodatabase()) {
683 my $infodbtype = $_->{'infodbtype'};
684 my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
685
686 if(!$dbSupportsConcurrentRW) {
687 #$self->{'justdeactivate'} = 1;
688 #$self->activate_collection();
689 #$self->{'justdeactivate'} = 0;
690 $self->activate_collection("just_deactivate");
691 }
692 $_->make_infodatabase();
693 }
694
695 } @builders;
696
697 map { local $_=$_; $_->collect_specific(); } @builders;
698 }
699 elsif ($mode =~ /^compress_text$/i)
700 {
701 map { local $_=$_; $_->compress_text($textindex); } @builders;
702 }
703 elsif ($mode =~ /^build_index$/i)
704 {
705 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
706 }
707 elsif ($mode =~ /^infodb$/i)
708 {
709 map {
710 local $_=$_;
711
712 # when incrementally rebuilding a collection using any db that doesn't support concurrent
713 # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()
714
715 if($_->supports_make_infodatabase()) {
716 my $infodbtype = $_->{'infodbtype'};
717 my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
718
719 if(!$dbSupportsConcurrentRW) {
720 $self->activate_collection("just_deactivate");
721 }
722 $_->make_infodatabase();
723 }
724 } @builders;
725 }
726 else
727 {
728 (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
729 }
730}
731# build_collection()
732
733# @function build_auxiliary_files
734#
735sub build_auxiliary_files
736{
737 my $self = shift(@_);
738 my @builders = @{shift(@_)};
739 if (!$self->{'debug'})
740 {
741 map {local $_=$_; $_->make_auxiliary_files(); } @builders;
742 }
743}
744# build_auxiliary_files()
745
746# @function complete_builders
747#
748sub complete_builders
749{
750 my $self = shift(@_);
751 my @builders = @{shift(@_)};
752
753 map {local $_=$_; $_->deinit(); } @builders;
754
755 if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
756 {
757 if ($self->{'verbosity'} >= 1)
758 {
759 &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
760 }
761 &FileUtils::removeFilesRecursive($self->{'builddir'});
762 &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
763 }
764
765 # for RSS support: Need rss-items.rdf file in index folder
766 # check if a file called rss-items.rdf exists in archives, then copy it into the building folder
767 # so that when building is moved to index, this file will then also be in index as desired
768 my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
769 $self->{'collection'},
770 $self->{'site'});
771 my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
772 # @todo FileUtils
773 if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
774 {
775 if ($self->{'verbosity'} >= 1)
776 {
777 my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
778 my $builddir_tail = "'".basename($self->{'builddir'})."'";
779
780 &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
781 }
782 &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
783 }
784
785 if ($self->{'gli'})
786 {
787 print STDERR "</Build>\n";
788 }
789}
790# complete_builders()
791
792# @function activate_collection
793#
794sub activate_collection
795{
796 my $self = shift(@_);
797 my $activation_cmd = shift(@_);
798 my($just_activate, $just_deactivate);
799 if(defined $activation_cmd) {
800 $just_deactivate = 1 if($activation_cmd eq "just_deactivate");
801 $just_activate = 1 if($activation_cmd eq "just_activate");
802 if($activation_cmd eq "just_deactivate_and_activate") {
803 $just_deactivate = 1;
804 $just_activate = 1;
805 }
806 }
807
808 # if buildcol.pl was run with -activate, need to run activate.pl
809 # now that building's complete
810 if ($self->{'activate'})
811 {
812 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
813 my @activate_argv = ();
814 push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
815 push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
816 push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
817 push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
818 push(@activate_argv, '-removeold') if ($self->{'removeold'});
819 push(@activate_argv, '-keepold') if ($self->{'keepold'});
820 push(@activate_argv, '-incremental') if ($self->{'incremental'});
821 push(@activate_argv, '-justactivate') if ($just_activate || $self->{'justactivate'});
822 push(@activate_argv, '-justdeactivate') if ($just_deactivate || $self->{'justdeactivate'});
823 my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
824 my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
825 my $activatecol_status = system($activatecol_cmd)/256;
826
827 if ($activatecol_status != 0)
828 {
829 print STDERR "Error: Failed to run: $activatecol_cmd\n";
830 print STDERR " $!\n" if ($! ne '');
831 exit(-1);
832 }
833 }
834}
835
836# @function deinit()
837#
838sub deinit
839{
840 my $self = shift(@_);
841
842 if ($self->{'close_out'})
843 {
844 close OUT;
845 }
846 if ($self->{'close_faillog'})
847 {
848 close FAILLOG;
849 }
850}
851# deinit()
852
8531;
Note: See TracBrowser for help on using the repository browser.