source: main/trunk/greenstone2/perllib/buildcolutils.pm@ 28801

Last change on this file since 28801 was 28801, checked in by ak19, 10 years ago

New mode to buildcol.pl added called 'extra'. This restricts the build to only sending/processing the archives content by the orthogonal indexes

  • Property svn:executable set to *
File size: 25.4 KB
Line 
1###############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4# of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
30#use strict;
31#no strict 'refs';
32
33use File::Basename;
34
35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use gsprintf;
41use printusage;
42use parse2;
43
44## @method new()
45#
46# Parses up and validates the arguments to the build process before creating
47# the appropriate build process to do the actual work
48#
49# @note Added true incremental support - John Thompson, DL Consulting Ltd.
50# @note There were several bugs regarding using directories other than
51# "import" or "archives" during import and build quashed. - John
52# Thompson, DL Consulting Ltd.
53#
54# @param $incremental If true indicates this build should not regenerate all
55# the index and metadata files, and should instead just
56# append the information found in the archives directory
57# to the existing files. If this requires some complex
58# work so as to correctly insert into a classifier so be
59# it. Of course none of this is done here - instead the
60# incremental argument is passed to the document
61# processor.
62#
63sub new
64{
65 my $class = shift(@_);
66 my ($argv, $options, $opt_listall_options) = @_;
67
68 my $self = {'builddir' => undef,
69 'buildtype' => undef,
70 'close_faillog' => 0,
71 'close_out' => 0,
72 'mode' => '',
73 'orthogonalbuildtypes' => undef,
74 'realbuilddir' => undef,
75 'textindex' => '',
76 'xml' => 0
77 };
78
79 # general options available to all plugins
80 my $arguments = $options->{'args'};
81 my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
82 # If parse returns -1 then something has gone wrong
83 if ($intArgLeftinAfterParsing == -1)
84 {
85 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
86 die "\n";
87 }
88
89 # If $language has been specified, load the appropriate resource bundle
90 # (Otherwise, the default resource bundle will be loaded automatically)
91 if ($self->{'language'} && $self->{'language'} =~ /\S/)
92 {
93 &gsprintf::load_language_specific_resource_bundle($self->{'language'});
94 }
95
96 # Do we need 'listall' support in buildcol? If so, copy code from inexport
97 # later [jmt12]
98
99 # <insert explanation here>
100 if ($self->{'xml'})
101 {
102 &PrintUsage::print_xml_usage($options);
103 print "\n";
104 return bless($self, $class);
105 }
106
107 # the gli wants strings to be in UTF-8
108 if ($gli)
109 {
110 &gsprintf::output_strings_in_UTF8;
111 }
112
113 # now check that we had exactly one leftover arg, which should be
114 # the collection name. We don't want to do this earlier, cos
115 # -xml arg doesn't need a collection name
116 # Or if the user specified -h, then we output the usage also
117 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
118 {
119 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
120 die "\n";
121 }
122
123 my $out = $self->{'out'};
124 if ($out !~ /^(STDERR|STDOUT)$/i)
125 {
126 open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
127 $out = "buildcolutils::OUT";
128 $self->{'close_out'} = 1;
129 }
130 $out->autoflush(1);
131 $self->{'out'} = $out;
132
133 # @ARGV should be only one item, the name of the collection
134 $self->{'collection'} = shift(@{$argv});
135
136 return bless($self, $class);
137}
138# new()
139
140# newCGI()?
141
142# @function get_collection
143#
144sub get_collection
145{
146 my $self = shift @_;
147 return $self->{'collection'};
148}
149# get_collection()
150
151# @function read_collection_cfg
152#
153sub read_collection_cfg
154{
155 my $self = shift(@_);
156 my ($collection, $options) = @_;
157
158 my $collectdir = $self->{'collectdir'};
159 my $site = $self->{'site'};
160 my $out = $self->{'out'};
161
162 # get and check the collection
163 if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
164 {
165 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
166 die "\n";
167 }
168
169 # set gs_version 2/3
170 $self->{'gs_version'} = "2";
171 if ((defined $site) && ($site ne ""))
172 {
173 # gs3
174 $self->{'gs_version'} = "3";
175 }
176
177 # add collection's perllib dir into include path in case we have collection
178 # specific modules
179 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
180 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
181 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
182
183 # check that we can open the faillog
184 my $faillog = $self->{'faillog'};
185 if ($faillog eq "")
186 {
187 $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
188 }
189 # note that we're appending to the faillog here (import.pl clears it each time)
190 # this could potentially create a situation where the faillog keeps being added
191 # to over multiple builds (if the import process is being skipped)
192 open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
193 $faillog = 'buildcolutils::FAILLOG';
194 $faillog->autoflush(1);
195 $self->{'faillog'} = $faillog;
196 $self->{'faillogname'} = $faillog;
197 $self->{'close_faillog'} = 1;
198
199 # Read in the collection configuration file.
200 my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
201 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
202 my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
203
204 return ($config_filename, $collect_cfg);
205}
206# read_collection_cfg()
207
208# @function set_collection_options
209# This function copies across values for arguments from the collection
210# configuration file if they are not already provided by the user, then
211# sets reasonable defaults for any required arguments that remains without
212# a value.
213sub set_collection_options
214{
215 my $self = shift @_;
216 my ($collectcfg) = @_;
217 my ($buildtype, $orthogonalbuildtypes);
218
219 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
220 if (!defined($collectcfg->{'infodbtype'}))
221 {
222 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
223 }
224 # - just so I don't have to pass collectcfg around as well
225 $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
226
227 if ($self->{'verbosity'} !~ /\d+/)
228 {
229 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
230 {
231 $self->{'verbosity'} = $collectcfg->{'verbosity'};
232 }
233 else
234 {
235 $self->{'verbosity'} = 2; # the default
236 }
237 }
238
239 # we use searchtype for determining buildtype, but for old versions, use buildtype
240 if (defined $collectcfg->{'buildtype'})
241 {
242 $self->{'buildtype'} = $collectcfg->{'buildtype'};
243 }
244 elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
245 {
246 $self->{'buildtype'} = "mgpp";
247 }
248 else
249 {
250 $self->{'buildtype'} = "mg"; #mg is the default
251 }
252
253 if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
254 {
255 $self->{'textindex'} = $collectcfg->{'textcompress'};
256 }
257
258 # is it okay to always clobber or possible remain undefined? [jmt12]
259 if (defined $collectcfg->{'orthogonalbuildtypes'})
260 {
261 $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
262 }
263
264 # - resolve (and possibly set to default) builddir
265 if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
266 {
267 $self->{'archivedir'} = $collectcfg->{'archivedir'};
268 }
269 # Modified so that the archivedir, if provided as an argument, is made
270 # absolute if it isn't already
271 if ($self->{'archivedir'} eq "")
272 {
273 $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
274 }
275 else
276 {
277 $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $archivedir);
278 }
279 # End Mod
280 $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
281 #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
282 #$self->{'archivedir'} =~ s/\/$//;
283
284 # - resolve (and possibly set to default) builddir
285 if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
286 {
287 $self->{'builddir'} = $collectcfg->{'builddir'};
288 }
289 if ($self->{'builddir'} eq "")
290 {
291 $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
292 if ($incremental)
293 {
294 &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
295 }
296 }
297 # - why don't we make builddir absolute similar to archivedir?
298 $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
299 #$self->{'builddir'} =~ s/[\\\/]+/\//g;
300 #$self->{'builddir'} =~ s/\/$//;
301
302 if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
303 {
304 $self->{'cachedir'} = $collectcfg->{'cachedir'};
305 }
306
307 if ($self->{'maxdocs'} !~ /\-?\d+/)
308 {
309 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
310 {
311 $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
312 }
313 else
314 {
315 $self->{'maxdocs'} = -1; # the default
316 }
317 }
318
319 # always clobbers? [jmt12]
320 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
321 {
322 $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
323 }
324 if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
325 {
326 $self->{'maxnumeric'} = 4;
327 }
328
329 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
330 {
331 $self->{'debug'} = 1;
332 }
333
334 if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
335 {
336 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
337 {
338 $self->{'mode'} = $collectcfg->{'mode'};
339 }
340 else
341 {
342 $self->{'mode'} = "all"; # the default
343 }
344 }
345
346 # Presumably 'index' from the collect.cfg still works [jmt12]
347 if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
348 {
349 $self->{'indexname'} = $collectcfg->{'index'};
350 }
351 # - 'index' from the command line doesn't make it through parsing so I
352 # renamed this option 'indexname' [jmt12]
353 if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
354 {
355 $self->{'indexname'} = $collectcfg->{'indexname'};
356 }
357 # - we may also define the index level to build now [jmt12]
358 if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
359 {
360 $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
361 }
362
363 if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
364 {
365 if ($collectcfg->{'no_text'} =~ /^true$/i)
366 {
367 $self->{'no_text'} = 1;
368 }
369 }
370
371 if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
372 {
373 if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
374 {
375 $self->{'no_strip_html'} = 1;
376 }
377 }
378
379 if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
380 {
381 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
382 {
383 $self->{'store_metadata_coverage'} = 1;
384 }
385 }
386
387 if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
388 {
389 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
390 {
391 $self->{'remove_empty_classifications'} = 1;
392 }
393 }
394
395 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
396 {
397 $self->{'gli'} = 1;
398 }
399 if (!defined $self->{'gli'})
400 {
401 $self->{'gli'} = 0;
402 }
403
404 if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
405 {
406 $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
407 }
408
409 if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
410 $self->{'sections_index_document_metadata'} = 'never';
411 }
412
413 if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
414 {
415 $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
416 }
417
418 if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
419 $self->{'sections_sort_on_document_metadata'} = 'never';
420 }
421
422 my ($removeold, $keepold, $incremental, $incremental_mode)
423 = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
424 $self->{'incremental'}, 'building',
425 $collectcfg);
426 $self->{'removeold'} = $removeold;
427 $self->{'keepold'} = $keepold;
428 $self->{'incremental'} = $incremental;
429 $self->{'incremental_mode'} = $incremental_mode;
430
431 # New argument to track whether build is incremental
432 if (!defined $self->{'incremental'})
433 {
434 $self->{'incremental'} = 0;
435 }
436
437 #set the text index
438 if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
439 {
440 if ($self->{'textindex'} eq '')
441 {
442 $self->{'textindex'} = 'text';
443 }
444 }
445 else
446 {
447 $self->{'textindex'} = 'section:text';
448 }
449}
450# set_collection_options()
451
452# @function prepare_builders
453#
454sub prepare_builders
455{
456 my $self = shift @_;
457 my ($config_filename,$collectcfg) = @_;
458
459 my $archivedir = $self->{'archivedir'};
460 my $builddir = $self->{'builddir'};
461 my $buildtype = $self->{'buildtype'};
462 my $cachedir = $self->{'cachedir'};
463 my $collectdir = $self->{'collectdir'};
464 my $collection = $self->{'collection'};
465 my $debug = $self->{'debug'};
466 my $faillog = $self->{'faillog'};
467 my $gli = $self->{'gli'};
468 my $incremental = $self->{'incremental'};
469 my $incremental_mode = $self->{'incremental_mode'};
470 my $keepold = $self->{'keepold'};
471 my $maxdocs = $self->{'maxdocs'};
472 my $maxnumeric = $self->{'maxnumeric'};
473 my $no_strip_html = $self->{'no_strip_html'};
474 my $no_text = $self->{'no_text'};
475 my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
476 my $out = $self->{'out'};
477 my $remove_empty_classifications = $self->{'remove_empty_classifications'};
478 my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
479 my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
480 my $site = $self->{'site'};
481 my $store_metadata_coverage = $self->{'store_metadata_coverage'};
482 my $verbosity = $self->{'verbosity'};
483
484 if ($gli)
485 {
486 print STDERR "<Build>\n";
487 }
488
489 # fill in the default archives and building directories if none
490 # were supplied, turn all \ into / and remove trailing /
491
492 my ($realarchivedir, $realbuilddir);
493 # update the archive cache if needed
494 if ($cachedir)
495 {
496 if ($verbosity >= 1)
497 {
498 &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
499 }
500
501 $cachedir =~ s/[\\\/]+$//;
502 if ($cachedir !~ /collect[\/\\]$collection/)
503 {
504 $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
505 }
506
507 $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
508 $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
509 &FileUtils::makeAllDirectories($realarchivedir);
510 &FileUtils::makeAllDirectories($realbuilddir);
511 &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
512 }
513 else
514 {
515 $realarchivedir = $archivedir;
516 $realbuilddir = $builddir;
517 }
518 $self->{'realarchivedir'} = $realarchivedir;
519 $self->{'realbuilddir'} = $realbuilddir;
520
521 # build it in realbuilddir
522 &FileUtils::makeAllDirectories($realbuilddir);
523
524 my ($buildertype, $builderdir, $builder);
525 # if a builder class has been created for this collection, use it
526 # otherwise, use the mg or mgpp builder
527 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
528 {
529 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
530 $buildertype = "custombuilder";
531 }
532 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
533 {
534 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
535 $buildertype = "custombuilder";
536 }
537 elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
538 {
539 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
540 $buildertype = $collection . 'builder';
541 }
542 else
543 {
544 $builderdir = undef;
545 if ($buildtype ne '')
546 {
547 # caters for extension-based build types, such as 'solr'
548 $buildertype = $buildtype . 'builder';
549 }
550 else
551 {
552 # Default to mgpp
553 $buildertype = 'mgppbuilder';
554 }
555 }
556 # check for extension specific builders
557 # (that will then be run after main builder.pm
558 my @builderdir_list = ($builderdir);
559 my @buildertype_list = ($buildertype);
560
561 my $mode = $self->{'mode'};
562
563 if ($mode eq "extra") {
564 # knock out the main builder type, by reseting the lists to be empty
565 @builderdir_list = ();
566 @buildertype_list = ();
567 }
568
569 if (defined $orthogonalbuildtypes)
570 {
571 foreach my $obt (@$orthogonalbuildtypes)
572 {
573 push(@builderdir_list,undef); # rely on @INC to find it
574 push(@buildertype_list,$obt."Builder");
575 }
576 }
577
578 # Set up array of the main builder.pm, followed by any ones
579 # from the extension folders
580
581 my $num_builders = scalar(@buildertype_list);
582 my @builders = ();
583
584 for (my $i=0; $i<$num_builders; $i++)
585 {
586 my $this_builder;
587 my $this_buildertype = $buildertype_list[$i];
588 my $this_builderdir = $builderdir_list[$i];
589
590 if ((defined $this_builderdir) && ($this_builderdir ne ""))
591 {
592 require "$this_builderdir/$this_buildertype.pm";
593 }
594 else
595 {
596 require "$this_buildertype.pm";
597 }
598
599 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
600 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
601 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
602 "\$remove_empty_classifications, " .
603 "\$out, \$no_text, \$faillog, \$gli)");
604 die "$@" if $@;
605
606 push(@builders,$this_builder);
607 }
608
609 # Init phase for builders
610 for (my $i=0; $i<$num_builders; $i++)
611 {
612 my $this_buildertype = $buildertype_list[$i];
613 my $this_builderdir = $builderdir_list[$i];
614 my $this_builder = $builders[$i];
615
616 $this_builder->init();
617 $this_builder->set_maxnumeric($maxnumeric);
618
619 if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
620 {
621 $this_builder->set_strip_html(0);
622 }
623
624 if ($sections_index_document_metadata ne "never")
625 {
626 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
627 }
628 if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
629 {
630 $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
631 }
632
633 if ($store_metadata_coverage)
634 {
635 $this_builder->set_store_metadata_coverage(1);
636 }
637 }
638 return \@builders;
639}
640
641sub build_collection
642{
643 my $self = shift(@_);
644 my @builders = @{shift(@_)};
645
646 my $indexlevel = $self->{'indexlevel'};
647 my $indexname = $self->{'indexname'};
648 my $mode = $self->{'mode'};
649 my $textindex = $self->{'textindex'};
650
651 # Run the requested passes
652 if ($mode =~ /^(all|extra)$/i)
653 {
654 # 'map' modifies the elements of the original array, so calling
655 # methods -- as done below -- will cause (by default) @builders
656 # to be changed to whatever these functions return (which is *not*
657 # what we want -- we want to leave the values unchanged)
658 # => Use 'local' (dynamic scoping) to give each 'map' call its
659 # own local copy This could also be done with:
660 # (my $new =$_)->method(); $new
661 # but is a bit more cumbersome to write
662 map { local $_=$_; $_->compress_text($textindex); } @builders;
663 # - we pass the required indexname and indexlevel (if specified) to the
664 # processor [jmt12]
665 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
666 map { local $_=$_; $_->make_infodatabase(); } @builders;
667 map { local $_=$_; $_->collect_specific(); } @builders;
668 }
669 elsif ($mode =~ /^compress_text$/i)
670 {
671 map { local $_=$_; $_->compress_text($textindex); } @builders;
672 }
673 elsif ($mode =~ /^build_index$/i)
674 {
675 map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
676 }
677 elsif ($mode =~ /^infodb$/i)
678 {
679 map { local $_=$_; $_->make_infodatabase(); } @builders;
680 }
681 else
682 {
683 (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
684 }
685}
686# build_collection()
687
688# @function build_auxiliary_files
689#
690sub build_auxiliary_files
691{
692 my $self = shift(@_);
693 my @builders = @{shift(@_)};
694 if (!$self->{'debug'})
695 {
696 map {local $_=$_; $_->make_auxiliary_files(); } @builders;
697 }
698}
699# build_auxiliary_files()
700
701# @function complete_builders
702#
703sub complete_builders
704{
705 my $self = shift(@_);
706 my @builders = @{shift(@_)};
707
708 map {local $_=$_; $_->deinit(); } @builders;
709
710 if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
711 {
712 if ($self->{'verbosity'} >= 1)
713 {
714 &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
715 }
716 &FileUtils::removeFilesRecursive($self->{'builddir'});
717 &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
718 }
719
720 # for RSS support: Need rss-items.rdf file in index folder
721 # check if a file called rss-items.rdf exists in archives, then copy it into the building folder
722 # so that when building is moved to index, this file will then also be in index as desired
723 my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
724 $self->{'collection'},
725 $self->{'site'});
726 my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
727 # @todo FileUtils
728 if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
729 {
730 if ($self->{'verbosity'} >= 1)
731 {
732 my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
733 my $builddir_tail = "'".basename($self->{'builddir'})."'";
734
735 &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
736 }
737 &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
738 }
739
740 if ($self->{'gli'})
741 {
742 print STDERR "</Build>\n";
743 }
744}
745# complete_builders()
746
747# @function activate_collection
748#
749sub activate_collection
750{
751 my $self = shift(@_);
752 # if buildcol.pl was run with -activate, need to run activate.pl
753 # now that building's complete
754 if ($self->{'activate'})
755 {
756 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
757 my @activate_argv = ();
758 push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
759 push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
760 push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
761 push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
762 push(@activate_argv, '-removeold') if ($self->{'removeold'});
763 push(@activate_argv, '-keepold') if ($self->{'keepold'});
764 push(@activate_argv, '-incremental') if ($self->{'incremental'});
765 my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
766 my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
767 my $activatecol_status = system($activatecol_cmd)/256;
768
769 if ($activatecol_status != 0)
770 {
771 print STDERR "Error: Failed to run: $activatecol_cmd\n";
772 print STDERR " $!\n" if ($! ne '');
773 exit(-1);
774 }
775 }
776}
777
778# @function deinit()
779#
780sub deinit
781{
782 my $self = shift(@_);
783
784 if ($self->{'close_out'})
785 {
786 close OUT;
787 }
788 if ($self->{'close_faillog'})
789 {
790 close FAILLOG;
791 }
792}
793# deinit()
794
7951;
Note: See TracBrowser for help on using the repository browser.