root/main/trunk/greenstone2/perllib/buildcolutils.pm @ 31753

Revision 31753, 27.7 KB (checked in by ak19, 18 months ago)

Two fixes Kathy requested: 1. when running buildcol, ONLY deactivate a collection surrounding the lock-sensitive make_infodatabase() calls IF incremental. 2. Allow buildcol.pl to accept activate parameters like library_url (library_name and skipactivation). full-(re)build and incremental-(re)build scripts already accept additional parameters such as with -activate:skipactivation, but buildcol.pl doesn't work that way.

  • Property svn:executable set to *
Line 
1##############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4#                    of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
30#use strict;
31#no strict 'refs';
32
33use File::Basename;
34
35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use servercontrol;
41use gsprintf;
42use printusage;
43use parse2;
44
45## @method new()
46#
47#  Parses up and validates the arguments to the build process before creating
48#  the appropriate build process to do the actual work
49#
50#  @note Added true incremental support - John Thompson, DL Consulting Ltd.
51#  @note There were several bugs regarding using directories other than
52#        "import" or "archives" during import and build quashed. - John
53#        Thompson, DL Consulting Ltd.
54#
55#  @param  $incremental If true indicates this build should not regenerate all
56#                       the index and metadata files, and should instead just
57#                       append the information found in the archives directory
58#                       to the existing files. If this requires some complex
59#                       work so as to correctly insert into a classifier so be
60#                       it. Of course none of this is done here - instead the
61#                       incremental argument is passed to the document
62#                       processor.
63#
64sub new
65{
66  my $class = shift(@_);
67  my ($argv, $options, $opt_listall_options) = @_;
68
69  my $self = {'builddir' => undef,
70              'buildtype' => undef,
71              'close_faillog' => 0,
72              'close_out' => 0,
73              'mode' => '',
74              'orthogonalbuildtypes' => undef,
75              'realbuilddir' => undef,
76              'textindex' => '',
77              'xml' => 0
78             };
79
80  # general options available to all plugins
81  my $arguments = $options->{'args'};
82  my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
83  # If parse returns -1 then something has gone wrong
84  if ($intArgLeftinAfterParsing == -1)
85  {
86    &PrintUsage::print_txt_usage($options, "{buildcol.params}",1);
87    print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n";
88    die "\n";
89  }
90
91  # If $language has been specified, load the appropriate resource bundle
92  # (Otherwise, the default resource bundle will be loaded automatically)
93  if ($self->{'language'} && $self->{'language'} =~ /\S/)
94  {
95    &gsprintf::load_language_specific_resource_bundle($self->{'language'});
96  }
97
98  # Do we need 'listall' support in buildcol? If so, copy code from inexport
99  # later [jmt12]
100
101  # <insert explanation here>
102  if ($self->{'xml'})
103  {
104    &PrintUsage::print_xml_usage($options);
105    print "\n";
106    return bless($self, $class);
107  }
108
109  # the gli wants strings to be in UTF-8
110  if ($gli)
111  {
112    &gsprintf::output_strings_in_UTF8;
113  }
114 
115  # If the user specified -h, then we output the usage
116  if (@$argv && $argv->[0] =~ /^\-+h/) {
117      &PrintUsage::print_txt_usage($options, "{buildcol.params}");
118      die "\n";
119  }
120 
121  # now check that we had exactly one leftover arg, which should be
122  # the collection name. We don't want to do this earlier, cos
123  # -xml arg doesn't need a collection name
124  if ($intArgLeftinAfterParsing != 1)
125  {
126    &PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
127    print STDERR "There should be one argument left after parsing the script args: the collection name.\n";
128    die "\n";
129  }
130
131  my $out = $self->{'out'};
132  if ($out !~ /^(STDERR|STDOUT)$/i)
133  {
134    open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
135    $out = "buildcolutils::OUT";
136    $self->{'close_out'} = 1;
137  }
138  $out->autoflush(1);
139  $self->{'out'} = $out;
140
141  # @ARGV should be only one item, the name of the collection
142  $self->{'collection'} = shift(@{$argv});
143
144  return bless($self, $class);
145}
146# new()
147
148# newCGI()?
149
150# @function get_collection
151#
152sub get_collection
153{
154  my $self = shift @_;
155  return $self->{'collection'};
156}
157# get_collection()
158
159# @function read_collection_cfg
160#
161sub read_collection_cfg
162{
163  my $self = shift(@_);
164  my ($collection, $options) = @_;
165
166  my $collectdir = $self->{'collectdir'};
167  my $site       = $self->{'site'};
168  my $out        = $self->{'out'};
169
170  # get and check the collection
171  if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
172  {
173    #&PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
174    die "\n";
175  }
176
177  # set gs_version 2/3
178  $self->{'gs_version'} = "2";
179  if ((defined $site) && ($site ne ""))
180  {
181    # gs3
182    $self->{'gs_version'} = "3";
183  }
184
185  # add collection's perllib dir into include path in case we have collection
186  # specific modules
187  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
188  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
189  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
190
191  # check that we can open the faillog
192  my $faillog = $self->{'faillog'};
193  if ($faillog eq "")
194  {
195    $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
196  }
197  # note that we're appending to the faillog here (import.pl clears it each time)
198  # this could potentially create a situation where the faillog keeps being added
199  # to over multiple builds (if the import process is being skipped)
200  open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
201  $faillog = 'buildcolutils::FAILLOG';
202  $faillog->autoflush(1);
203  $self->{'faillog'} = $faillog;
204  $self->{'faillogname'} = $faillog;
205  $self->{'close_faillog'} = 1;
206
207  # Read in the collection configuration file.
208  my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
209  my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
210  my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
211
212  return ($config_filename, $collect_cfg);
213}
214# read_collection_cfg()
215
216# @function set_collection_options
217# This function copies across values for arguments from the collection
218# configuration file if they are not already provided by the user, then
219# sets reasonable defaults for any required arguments that remains without
220# a value.
221sub set_collection_options
222{
223  my $self = shift @_;
224  my ($collectcfg) = @_;
225  my ($buildtype, $orthogonalbuildtypes);
226
227  # If the infodbtype value wasn't defined in the collect.cfg file, use the default
228  if (!defined($collectcfg->{'infodbtype'}))
229  {
230    $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
231  }
232  # - just so I don't have to pass collectcfg around as well
233  $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
234
235  if ($self->{'verbosity'} !~ /\d+/)
236  {
237    if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
238    {
239      $self->{'verbosity'} = $collectcfg->{'verbosity'};
240    }
241    else
242    {
243      $self->{'verbosity'} = 2; # the default
244    }
245  }
246
247  # we use searchtype for determining buildtype, but for old versions, use buildtype
248  if (defined $collectcfg->{'buildtype'})
249  {
250    $self->{'buildtype'} = $collectcfg->{'buildtype'};
251  }
252  elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
253  {
254    $self->{'buildtype'} = "mgpp";
255  }
256  else
257  {
258    $self->{'buildtype'} = "mg"; #mg is the default
259  }
260
261  if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
262  {
263    $self->{'textindex'} = $collectcfg->{'textcompress'};
264  }
265
266  # is it okay to always clobber or possible remain undefined? [jmt12]
267  if (defined $collectcfg->{'orthogonalbuildtypes'})
268  {
269    $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
270  }
271
272  # - resolve (and possibly set to default) builddir
273  if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
274  {
275    $self->{'archivedir'} = $collectcfg->{'archivedir'};
276  }
277  # Modified so that the archivedir, if provided as an argument, is made
278  # absolute if it isn't already
279  if ($self->{'archivedir'} eq "")
280  {
281    $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
282  }
283  else
284  {
285    $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'archivedir'});
286  }
287  # End Mod
288  $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
289  #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
290  #$self->{'archivedir'} =~ s/\/$//;
291
292  # - resolve (and possibly set to default) builddir
293  if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
294  {
295    $self->{'builddir'} = $collectcfg->{'builddir'};
296  }
297  if ($self->{'builddir'} eq "")
298  {
299    $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
300    if ($incremental)
301    {
302      &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
303    }
304  } else {
305      # make absolute if not already
306      $self->{'builddir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'builddir'});
307  }
308 
309  $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
310  #$self->{'builddir'} =~ s/[\\\/]+/\//g;
311  #$self->{'builddir'} =~ s/\/$//;
312
313  if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
314  {
315    $self->{'cachedir'} = $collectcfg->{'cachedir'};
316  }
317
318  if ($self->{'maxdocs'} !~ /\-?\d+/)
319  {
320    if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
321    {
322      $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
323    }
324    else
325    {
326      $self->{'maxdocs'} = -1; # the default
327    }
328  }
329
330  # always clobbers? [jmt12]
331  if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
332  {
333    $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
334  }
335  if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
336  {
337    $self->{'maxnumeric'} = 4;
338  }
339
340  if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
341  {
342    $self->{'debug'} = 1;
343  }
344
345  if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
346  {
347    if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
348    {
349      $self->{'mode'} = $collectcfg->{'mode'};
350    }
351    else
352    {
353      $self->{'mode'} = "all"; # the default
354    }
355  }
356
357  # Presumably 'index' from the collect.cfg still works [jmt12]
358  if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
359  {
360    $self->{'indexname'} = $collectcfg->{'index'};
361  }
362  # - 'index' from the command line doesn't make it through parsing so I
363  # renamed this option 'indexname' [jmt12]
364  if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
365  {
366    $self->{'indexname'} = $collectcfg->{'indexname'};
367  }
368  # - we may also define the index level to build now [jmt12]
369  if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
370  {
371    $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
372  }
373
374  if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
375  {
376    if ($collectcfg->{'no_text'} =~ /^true$/i)
377    {
378      $self->{'no_text'} = 1;
379    }
380  }
381
382  if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
383  {
384    if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
385    {
386      $self->{'no_strip_html'} = 1;
387    }
388  }
389
390  if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
391  {
392    if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
393    {
394      $self->{'store_metadata_coverage'} = 1;
395    }
396  }
397
398  if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
399  {
400    if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
401    {
402      $self->{'remove_empty_classifications'} = 1;
403    }
404  }
405
406  if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
407  {
408    $self->{'gli'} = 1;
409  }
410  if (!defined $self->{'gli'})
411  {
412    $self->{'gli'} = 0;
413  }
414
415  if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
416  {
417    $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
418  }
419
420  if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
421    $self->{'sections_index_document_metadata'} = 'never';
422  }
423
424  if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
425  {
426    $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
427  }
428
429  if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
430    $self->{'sections_sort_on_document_metadata'} = 'never';
431  }
432
433  my ($removeold, $keepold, $incremental, $incremental_mode)
434      = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
435                                                 $self->{'incremental'}, 'building',
436                                                 $collectcfg);
437  $self->{'removeold'}        = $removeold;
438  $self->{'keepold'}          = $keepold;
439  $self->{'incremental'}      = $incremental;
440  $self->{'incremental_mode'} = $incremental_mode;
441
442  # New argument to track whether build is incremental
443  if (!defined $self->{'incremental'})
444  {
445    $self->{'incremental'} = 0;
446  }
447
448  #set the text index
449  if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
450  {
451    if ($self->{'textindex'} eq '')
452    {
453      $self->{'textindex'} = 'text';
454    }
455  }
456  else
457  {
458    $self->{'textindex'} = 'section:text';
459  }
460}
461# set_collection_options()
462
463# @function prepare_builders
464#
465sub prepare_builders
466{
467  my $self = shift @_;
468  my ($config_filename,$collectcfg) = @_;
469
470  my $archivedir  = $self->{'archivedir'};
471  my $builddir    = $self->{'builddir'};
472  my $buildtype   = $self->{'buildtype'};
473  my $cachedir    = $self->{'cachedir'};
474  my $collectdir  = $self->{'collectdir'};
475  my $collection  = $self->{'collection'};
476  my $debug       = $self->{'debug'};
477  my $faillog     = $self->{'faillog'};
478  my $gli         = $self->{'gli'};
479  my $incremental = $self->{'incremental'};
480  my $incremental_mode = $self->{'incremental_mode'};
481  my $keepold     = $self->{'keepold'};
482  my $maxdocs     = $self->{'maxdocs'};
483  my $maxnumeric  = $self->{'maxnumeric'};
484  my $no_strip_html = $self->{'no_strip_html'};
485  my $no_text     = $self->{'no_text'};
486  my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
487  my $out         = $self->{'out'};
488  my $remove_empty_classifications = $self->{'remove_empty_classifications'};
489  my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
490  my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
491  my $site        = $self->{'site'};
492  my $store_metadata_coverage = $self->{'store_metadata_coverage'};
493  my $verbosity   = $self->{'verbosity'};
494
495  if ($gli)
496  {
497    print STDERR "<Build>\n";
498  }
499
500  # fill in the default archives and building directories if none
501  # were supplied, turn all \ into / and remove trailing /
502
503  my ($realarchivedir, $realbuilddir);
504  # update the archive cache if needed
505  if ($cachedir)
506  {
507    if ($verbosity >= 1)
508    {
509      &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
510    }
511
512    $cachedir =~ s/[\\\/]+$//;
513    if ($cachedir !~ /collect[\/\\]$collection/)
514    {
515      $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
516    }
517
518    $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
519    $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
520    &FileUtils::makeAllDirectories($realarchivedir);
521    &FileUtils::makeAllDirectories($realbuilddir);
522    &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
523  }
524  else
525  {
526    $realarchivedir = $archivedir;
527    $realbuilddir = $builddir;
528  }
529  $self->{'realarchivedir'} = $realarchivedir;
530  $self->{'realbuilddir'} = $realbuilddir;
531
532  # build it in realbuilddir
533  &FileUtils::makeAllDirectories($realbuilddir);
534
535  my ($buildertype, $builderdir,  $builder);
536  # if a builder class has been created for this collection, use it
537  # otherwise, use the mg or mgpp builder
538  if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
539  {
540    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
541    $buildertype = "custombuilder";
542  }
543  elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
544  {
545    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
546    $buildertype = "custombuilder";
547  }
548  elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
549  {
550    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
551    $buildertype = $collection . 'builder';
552  }
553  else
554  {
555    $builderdir = undef;
556    if ($buildtype ne '')
557    {
558      # caters for extension-based build types, such as 'solr'
559      $buildertype = $buildtype . 'builder';
560    }
561    else
562    {
563      # Default to mgpp
564      $buildertype = 'mgppbuilder';
565    }
566  }
567  # check for extension specific builders
568  # (that will then be run after main builder.pm
569  my @builderdir_list = ($builderdir);
570  my @buildertype_list = ($buildertype);
571
572  my $mode = $self->{'mode'};
573
574  if ($mode eq "extra") {
575      # knock out the main builder type, by reseting the lists to be empty
576      @builderdir_list = ();
577      @buildertype_list = ();
578  }
579
580  if (defined $orthogonalbuildtypes)
581  {
582    foreach my $obt (@$orthogonalbuildtypes)
583    {
584      push(@builderdir_list,undef); # rely on @INC to find it
585      push(@buildertype_list,$obt."Builder");
586    }
587  }
588
589  # Set up array of the main builder.pm, followed by any ones
590  # from the extension folders
591
592  my $num_builders = scalar(@buildertype_list);
593  my @builders = ();
594
595  for (my $i=0; $i<$num_builders; $i++)
596  {
597    my $this_builder;
598    my $this_buildertype = $buildertype_list[$i];
599    my $this_builderdir  = $builderdir_list[$i];
600
601    if ((defined $this_builderdir) && ($this_builderdir ne ""))
602    {
603      require "$this_builderdir/$this_buildertype.pm";
604    }
605    else
606    {
607      require "$this_buildertype.pm";
608    }
609
610    eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
611         "\$realarchivedir, \$realbuilddir, \$verbosity, " .
612         "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
613         "\$remove_empty_classifications, " .
614         "\$out, \$no_text, \$faillog, \$gli)");
615    die "$@" if $@;
616
617    push(@builders,$this_builder);
618  }
619
620  # Init phase for builders
621  for (my $i=0; $i<$num_builders; $i++)
622  {
623    my $this_buildertype = $buildertype_list[$i];
624    my $this_builderdir  = $builderdir_list[$i];
625    my $this_builder     = $builders[$i];
626
627    $this_builder->init();
628    $this_builder->set_maxnumeric($maxnumeric);
629
630    if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
631    {
632      $this_builder->set_strip_html(0);
633    }
634
635    if ($sections_index_document_metadata ne "never")
636    {
637      $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
638    }
639    if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
640    {
641      $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
642    }
643
644    if ($store_metadata_coverage)
645    {
646      $this_builder->set_store_metadata_coverage(1);
647    }
648  }
649  return \@builders;
650}
651
652sub build_collection
653{
654  my $self = shift(@_);
655  my @builders = @{shift(@_)};
656
657  my $indexlevel  = $self->{'indexlevel'};
658  my $indexname   = $self->{'indexname'};
659  my $mode        = $self->{'mode'};
660  my $textindex   = $self->{'textindex'};
661
662  # Run the requested passes
663  if ($mode =~ /^(all|extra)$/i)
664  {
665    # 'map' modifies the elements of the original array, so calling
666    # methods -- as done below -- will cause (by default) @builders
667    # to be changed to whatever these functions return (which is *not*
668    # what we want -- we want to leave the values unchanged)
669    # => Use 'local' (dynamic scoping) to give each 'map' call its
670    #    own local copy This could also be done with:
671    #      (my $new =$_)->method(); $new
672    #    but is a bit more cumbersome to write
673    map { local $_=$_; $_->compress_text($textindex); } @builders;
674    # - we pass the required indexname and indexlevel (if specified) to the
675    #   processor [jmt12]
676    map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
677
678    # If incremental, need to deactivate the collection for collections whose db don't support concurrent R+W
679    # All except the collection (1st parameter) can be empty. For GS3, also set the site parameter
680    my $gsserver = new servercontrol( $self->get_collection(), $self->{'site'}, $self->{'verbosity'}, $self->{'builddir'}, $self->{'indexdir'}, $self->{'collectdir'}, $self->{'library_url'}, $self->{'library_name'});
681
682    # when *incrementally* rebuilding a collection using any db that *doesn't* support concurrent
683    # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()   
684    map {
685    local $_=$_;
686
687    if($_->supports_make_infodatabase()) {
688        my $infodbtype = $_->{'infodbtype'};
689        my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
690   
691        if(!$dbSupportsConcurrentRW && $self->{'incremental'}) {
692        $gsserver->print_task_msg("About to deactivate collection ".$self->get_collection());
693        $gsserver->do_deactivate();     
694        }
695        $_->make_infodatabase();       
696    }
697
698    }  @builders;
699
700    map { local $_=$_; $_->collect_specific(); } @builders;
701  }
702  elsif ($mode =~ /^compress_text$/i)
703  {
704    map { local $_=$_; $_->compress_text($textindex); } @builders;
705  }
706  elsif ($mode =~ /^build_index$/i)
707  {
708    map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
709  }
710  elsif ($mode =~ /^infodb$/i)
711  {
712    map {
713    local $_=$_;
714
715    # when *incrementally* rebuilding a collection using any db that *doesn't* support concurrent
716    # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()
717
718    if($_->supports_make_infodatabase()) {
719        my $infodbtype = $_->{'infodbtype'};
720        my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
721   
722        if(!$dbSupportsConcurrentRW && $self->{'incremental'}) {
723        $gsserver->print_task_msg("About to deactivate collection ".$self->get_collection());
724        $gsserver->do_deactivate();
725        }
726        $_->make_infodatabase();
727    }
728    } @builders;
729  }
730  else
731  {
732    (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
733  }
734}
735# build_collection()
736
737# @function build_auxiliary_files
738#
739sub build_auxiliary_files
740{
741  my $self = shift(@_);
742  my @builders = @{shift(@_)};
743  if (!$self->{'debug'})
744  {
745    map {local $_=$_; $_->make_auxiliary_files(); } @builders;
746  }
747}
748# build_auxiliary_files()
749
750# @function complete_builders
751#
752sub complete_builders
753{
754  my $self = shift(@_);
755  my @builders = @{shift(@_)};
756
757  map {local $_=$_; $_->deinit(); } @builders;
758
759  if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
760  {
761    if ($self->{'verbosity'} >= 1)
762    {
763      &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
764    }
765    &FileUtils::removeFilesRecursive($self->{'builddir'});
766    &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
767  }
768
769  # for RSS support: Need rss-items.rdf file in index folder
770  #  check if a file called rss-items.rdf exists in archives, then copy it into the building folder
771  #  so that when building is moved to index, this file will then also be in index as desired
772  my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
773                                                     $self->{'collection'},
774                                                     $self->{'site'});
775  my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
776  # @todo FileUtils
777  if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
778  {
779    if ($self->{'verbosity'} >= 1)
780    {
781    my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
782    my $builddir_tail =  "'".basename($self->{'builddir'})."'";
783
784    &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
785    }
786    &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
787  }
788
789  if ($self->{'gli'})
790  {
791    print STDERR "</Build>\n";
792  }
793}
794# complete_builders()
795
796# @function activate_collection
797#
798sub activate_collection
799{
800  my $self = shift(@_);
801
802  # if buildcol.pl was run with -activate, need to run activate.pl
803  # now that building's complete
804  if ($self->{'activate'})
805  {
806    #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
807    my @activate_argv = ();
808    push(@activate_argv, '-library_url', $self->{'library_url'}) if ($self->{'library_url'});
809    push(@activate_argv, '-library_name', $self->{'library_name'}) if ($self->{'library_name'});
810    push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
811    push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
812    push(@activate_argv, '-indexdir', $self->{'indexdir'}) if ($self->{'indexdir'});
813    push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
814    push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
815    push(@activate_argv, '-removeold') if ($self->{'removeold'});
816    push(@activate_argv, '-keepold') if ($self->{'keepold'});
817    push(@activate_argv, '-incremental') if ($self->{'incremental'});
818    push(@activate_argv, '-skipactivation', $self->{'skipactivation'}) if ($self->{'skipactivation'});
819
820    my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
821    my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
822    my $activatecol_status = system($activatecol_cmd)/256;
823
824    if ($activatecol_status != 0)
825    {
826      print STDERR "Error: Failed to run: $activatecol_cmd\n";
827      print STDERR "       $!\n" if ($! ne '');
828      exit(-1);
829    }
830  }
831}
832
833# @function deinit()
834#
835sub deinit
836{
837  my $self = shift(@_);
838
839  if ($self->{'close_out'})
840  {
841    close OUT;
842  }
843  if ($self->{'close_faillog'})
844  {
845    close FAILLOG;
846  }
847}
848# deinit()
849
8501;
Note: See TracBrowser for help on using the browser.