root/main/trunk/greenstone2/perllib/buildcolutils.pm @ 30518

Revision 30518, 27.4 KB (checked in by ak19, 4 years ago)

Related to previous commit. Need to similarly handle another case of make_infodatabase().

  • Property svn:executable set to *
Line 
1##############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4#                    of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
30#use strict;
31#no strict 'refs';
32
33use File::Basename;
34
35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use gsprintf;
41use printusage;
42use parse2;
43
44## @method new()
45#
46#  Parses up and validates the arguments to the build process before creating
47#  the appropriate build process to do the actual work
48#
49#  @note Added true incremental support - John Thompson, DL Consulting Ltd.
50#  @note There were several bugs regarding using directories other than
51#        "import" or "archives" during import and build quashed. - John
52#        Thompson, DL Consulting Ltd.
53#
54#  @param  $incremental If true indicates this build should not regenerate all
55#                       the index and metadata files, and should instead just
56#                       append the information found in the archives directory
57#                       to the existing files. If this requires some complex
58#                       work so as to correctly insert into a classifier so be
59#                       it. Of course none of this is done here - instead the
60#                       incremental argument is passed to the document
61#                       processor.
62#
63sub new
64{
65  my $class = shift(@_);
66  my ($argv, $options, $opt_listall_options) = @_;
67
68  my $self = {'builddir' => undef,
69              'buildtype' => undef,
70              'close_faillog' => 0,
71              'close_out' => 0,
72              'mode' => '',
73              'orthogonalbuildtypes' => undef,
74              'realbuilddir' => undef,
75              'textindex' => '',
76              'xml' => 0
77             };
78
79  # general options available to all plugins
80  my $arguments = $options->{'args'};
81  my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
82  # If parse returns -1 then something has gone wrong
83  if ($intArgLeftinAfterParsing == -1)
84  {
85    &PrintUsage::print_txt_usage($options, "{buildcol.params}",1);
86    print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n";
87    die "\n";
88  }
89
90  # If $language has been specified, load the appropriate resource bundle
91  # (Otherwise, the default resource bundle will be loaded automatically)
92  if ($self->{'language'} && $self->{'language'} =~ /\S/)
93  {
94    &gsprintf::load_language_specific_resource_bundle($self->{'language'});
95  }
96
97  # Do we need 'listall' support in buildcol? If so, copy code from inexport
98  # later [jmt12]
99
100  # <insert explanation here>
101  if ($self->{'xml'})
102  {
103    &PrintUsage::print_xml_usage($options);
104    print "\n";
105    return bless($self, $class);
106  }
107
108  # the gli wants strings to be in UTF-8
109  if ($gli)
110  {
111    &gsprintf::output_strings_in_UTF8;
112  }
113 
114  # If the user specified -h, then we output the usage
115  if (@$argv && $argv->[0] =~ /^\-+h/) {
116      &PrintUsage::print_txt_usage($options, "{buildcol.params}");
117      die "\n";
118  }
119 
120  # now check that we had exactly one leftover arg, which should be
121  # the collection name. We don't want to do this earlier, cos
122  # -xml arg doesn't need a collection name
123  if ($intArgLeftinAfterParsing != 1)
124  {
125    &PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
126    print STDERR "There should be one argument left after parsing the script args: the collection name.\n";
127    die "\n";
128  }
129
130  my $out = $self->{'out'};
131  if ($out !~ /^(STDERR|STDOUT)$/i)
132  {
133    open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
134    $out = "buildcolutils::OUT";
135    $self->{'close_out'} = 1;
136  }
137  $out->autoflush(1);
138  $self->{'out'} = $out;
139
140  # @ARGV should be only one item, the name of the collection
141  $self->{'collection'} = shift(@{$argv});
142
143  return bless($self, $class);
144}
145# new()
146
147# newCGI()?
148
149# @function get_collection
150#
151sub get_collection
152{
153  my $self = shift @_;
154  return $self->{'collection'};
155}
156# get_collection()
157
158# @function read_collection_cfg
159#
160sub read_collection_cfg
161{
162  my $self = shift(@_);
163  my ($collection, $options) = @_;
164
165  my $collectdir = $self->{'collectdir'};
166  my $site       = $self->{'site'};
167  my $out        = $self->{'out'};
168
169  # get and check the collection
170  if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
171  {
172    #&PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
173    die "\n";
174  }
175
176  # set gs_version 2/3
177  $self->{'gs_version'} = "2";
178  if ((defined $site) && ($site ne ""))
179  {
180    # gs3
181    $self->{'gs_version'} = "3";
182  }
183
184  # add collection's perllib dir into include path in case we have collection
185  # specific modules
186  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
187  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
188  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
189
190  # check that we can open the faillog
191  my $faillog = $self->{'faillog'};
192  if ($faillog eq "")
193  {
194    $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
195  }
196  # note that we're appending to the faillog here (import.pl clears it each time)
197  # this could potentially create a situation where the faillog keeps being added
198  # to over multiple builds (if the import process is being skipped)
199  open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
200  $faillog = 'buildcolutils::FAILLOG';
201  $faillog->autoflush(1);
202  $self->{'faillog'} = $faillog;
203  $self->{'faillogname'} = $faillog;
204  $self->{'close_faillog'} = 1;
205
206  # Read in the collection configuration file.
207  my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
208  my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
209  my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
210
211  return ($config_filename, $collect_cfg);
212}
213# read_collection_cfg()
214
215# @function set_collection_options
216# This function copies across values for arguments from the collection
217# configuration file if they are not already provided by the user, then
218# sets reasonable defaults for any required arguments that remains without
219# a value.
220sub set_collection_options
221{
222  my $self = shift @_;
223  my ($collectcfg) = @_;
224  my ($buildtype, $orthogonalbuildtypes);
225
226  # If the infodbtype value wasn't defined in the collect.cfg file, use the default
227  if (!defined($collectcfg->{'infodbtype'}))
228  {
229    $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
230  }
231  # - just so I don't have to pass collectcfg around as well
232  $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
233
234  if ($self->{'verbosity'} !~ /\d+/)
235  {
236    if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
237    {
238      $self->{'verbosity'} = $collectcfg->{'verbosity'};
239    }
240    else
241    {
242      $self->{'verbosity'} = 2; # the default
243    }
244  }
245
246  # we use searchtype for determining buildtype, but for old versions, use buildtype
247  if (defined $collectcfg->{'buildtype'})
248  {
249    $self->{'buildtype'} = $collectcfg->{'buildtype'};
250  }
251  elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
252  {
253    $self->{'buildtype'} = "mgpp";
254  }
255  else
256  {
257    $self->{'buildtype'} = "mg"; #mg is the default
258  }
259
260  if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
261  {
262    $self->{'textindex'} = $collectcfg->{'textcompress'};
263  }
264
265  # is it okay to always clobber or possible remain undefined? [jmt12]
266  if (defined $collectcfg->{'orthogonalbuildtypes'})
267  {
268    $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
269  }
270
271  # - resolve (and possibly set to default) builddir
272  if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
273  {
274    $self->{'archivedir'} = $collectcfg->{'archivedir'};
275  }
276  # Modified so that the archivedir, if provided as an argument, is made
277  # absolute if it isn't already
278  if ($self->{'archivedir'} eq "")
279  {
280    $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
281  }
282  else
283  {
284    $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'archivedir'});
285  }
286  # End Mod
287  $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
288  #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
289  #$self->{'archivedir'} =~ s/\/$//;
290
291  # - resolve (and possibly set to default) builddir
292  if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
293  {
294    $self->{'builddir'} = $collectcfg->{'builddir'};
295  }
296  if ($self->{'builddir'} eq "")
297  {
298    $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
299    if ($incremental)
300    {
301      &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
302    }
303  } else {
304      # make absolute if not already
305      $self->{'builddir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'builddir'});
306  }
307 
308  $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
309  #$self->{'builddir'} =~ s/[\\\/]+/\//g;
310  #$self->{'builddir'} =~ s/\/$//;
311
312  if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
313  {
314    $self->{'cachedir'} = $collectcfg->{'cachedir'};
315  }
316
317  if ($self->{'maxdocs'} !~ /\-?\d+/)
318  {
319    if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
320    {
321      $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
322    }
323    else
324    {
325      $self->{'maxdocs'} = -1; # the default
326    }
327  }
328
329  # always clobbers? [jmt12]
330  if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
331  {
332    $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
333  }
334  if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
335  {
336    $self->{'maxnumeric'} = 4;
337  }
338
339  if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
340  {
341    $self->{'debug'} = 1;
342  }
343
344  if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
345  {
346    if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
347    {
348      $self->{'mode'} = $collectcfg->{'mode'};
349    }
350    else
351    {
352      $self->{'mode'} = "all"; # the default
353    }
354  }
355
356  # Presumably 'index' from the collect.cfg still works [jmt12]
357  if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
358  {
359    $self->{'indexname'} = $collectcfg->{'index'};
360  }
361  # - 'index' from the command line doesn't make it through parsing so I
362  # renamed this option 'indexname' [jmt12]
363  if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
364  {
365    $self->{'indexname'} = $collectcfg->{'indexname'};
366  }
367  # - we may also define the index level to build now [jmt12]
368  if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
369  {
370    $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
371  }
372
373  if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
374  {
375    if ($collectcfg->{'no_text'} =~ /^true$/i)
376    {
377      $self->{'no_text'} = 1;
378    }
379  }
380
381  if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
382  {
383    if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
384    {
385      $self->{'no_strip_html'} = 1;
386    }
387  }
388
389  if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
390  {
391    if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
392    {
393      $self->{'store_metadata_coverage'} = 1;
394    }
395  }
396
397  if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
398  {
399    if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
400    {
401      $self->{'remove_empty_classifications'} = 1;
402    }
403  }
404
405  if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
406  {
407    $self->{'gli'} = 1;
408  }
409  if (!defined $self->{'gli'})
410  {
411    $self->{'gli'} = 0;
412  }
413
414  if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
415  {
416    $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
417  }
418
419  if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
420    $self->{'sections_index_document_metadata'} = 'never';
421  }
422
423  if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
424  {
425    $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
426  }
427
428  if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
429    $self->{'sections_sort_on_document_metadata'} = 'never';
430  }
431
432  my ($removeold, $keepold, $incremental, $incremental_mode)
433      = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
434                                                 $self->{'incremental'}, 'building',
435                                                 $collectcfg);
436  $self->{'removeold'}        = $removeold;
437  $self->{'keepold'}          = $keepold;
438  $self->{'incremental'}      = $incremental;
439  $self->{'incremental_mode'} = $incremental_mode;
440
441  # New argument to track whether build is incremental
442  if (!defined $self->{'incremental'})
443  {
444    $self->{'incremental'} = 0;
445  }
446
447  #set the text index
448  if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
449  {
450    if ($self->{'textindex'} eq '')
451    {
452      $self->{'textindex'} = 'text';
453    }
454  }
455  else
456  {
457    $self->{'textindex'} = 'section:text';
458  }
459}
460# set_collection_options()
461
462# @function prepare_builders
463#
464sub prepare_builders
465{
466  my $self = shift @_;
467  my ($config_filename,$collectcfg) = @_;
468
469  my $archivedir  = $self->{'archivedir'};
470  my $builddir    = $self->{'builddir'};
471  my $buildtype   = $self->{'buildtype'};
472  my $cachedir    = $self->{'cachedir'};
473  my $collectdir  = $self->{'collectdir'};
474  my $collection  = $self->{'collection'};
475  my $debug       = $self->{'debug'};
476  my $faillog     = $self->{'faillog'};
477  my $gli         = $self->{'gli'};
478  my $incremental = $self->{'incremental'};
479  my $incremental_mode = $self->{'incremental_mode'};
480  my $keepold     = $self->{'keepold'};
481  my $maxdocs     = $self->{'maxdocs'};
482  my $maxnumeric  = $self->{'maxnumeric'};
483  my $no_strip_html = $self->{'no_strip_html'};
484  my $no_text     = $self->{'no_text'};
485  my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
486  my $out         = $self->{'out'};
487  my $remove_empty_classifications = $self->{'remove_empty_classifications'};
488  my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
489  my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
490  my $site        = $self->{'site'};
491  my $store_metadata_coverage = $self->{'store_metadata_coverage'};
492  my $verbosity   = $self->{'verbosity'};
493
494  if ($gli)
495  {
496    print STDERR "<Build>\n";
497  }
498
499  # fill in the default archives and building directories if none
500  # were supplied, turn all \ into / and remove trailing /
501
502  my ($realarchivedir, $realbuilddir);
503  # update the archive cache if needed
504  if ($cachedir)
505  {
506    if ($verbosity >= 1)
507    {
508      &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
509    }
510
511    $cachedir =~ s/[\\\/]+$//;
512    if ($cachedir !~ /collect[\/\\]$collection/)
513    {
514      $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
515    }
516
517    $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
518    $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
519    &FileUtils::makeAllDirectories($realarchivedir);
520    &FileUtils::makeAllDirectories($realbuilddir);
521    &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
522  }
523  else
524  {
525    $realarchivedir = $archivedir;
526    $realbuilddir = $builddir;
527  }
528  $self->{'realarchivedir'} = $realarchivedir;
529  $self->{'realbuilddir'} = $realbuilddir;
530
531  # build it in realbuilddir
532  &FileUtils::makeAllDirectories($realbuilddir);
533
534  my ($buildertype, $builderdir,  $builder);
535  # if a builder class has been created for this collection, use it
536  # otherwise, use the mg or mgpp builder
537  if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
538  {
539    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
540    $buildertype = "custombuilder";
541  }
542  elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
543  {
544    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
545    $buildertype = "custombuilder";
546  }
547  elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
548  {
549    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
550    $buildertype = $collection . 'builder';
551  }
552  else
553  {
554    $builderdir = undef;
555    if ($buildtype ne '')
556    {
557      # caters for extension-based build types, such as 'solr'
558      $buildertype = $buildtype . 'builder';
559    }
560    else
561    {
562      # Default to mgpp
563      $buildertype = 'mgppbuilder';
564    }
565  }
566  # check for extension specific builders
567  # (that will then be run after main builder.pm
568  my @builderdir_list = ($builderdir);
569  my @buildertype_list = ($buildertype);
570
571  my $mode = $self->{'mode'};
572
573  if ($mode eq "extra") {
574      # knock out the main builder type, by reseting the lists to be empty
575      @builderdir_list = ();
576      @buildertype_list = ();
577  }
578
579  if (defined $orthogonalbuildtypes)
580  {
581    foreach my $obt (@$orthogonalbuildtypes)
582    {
583      push(@builderdir_list,undef); # rely on @INC to find it
584      push(@buildertype_list,$obt."Builder");
585    }
586  }
587
588  # Set up array of the main builder.pm, followed by any ones
589  # from the extension folders
590
591  my $num_builders = scalar(@buildertype_list);
592  my @builders = ();
593
594  for (my $i=0; $i<$num_builders; $i++)
595  {
596    my $this_builder;
597    my $this_buildertype = $buildertype_list[$i];
598    my $this_builderdir  = $builderdir_list[$i];
599
600    if ((defined $this_builderdir) && ($this_builderdir ne ""))
601    {
602      require "$this_builderdir/$this_buildertype.pm";
603    }
604    else
605    {
606      require "$this_buildertype.pm";
607    }
608
609    eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
610         "\$realarchivedir, \$realbuilddir, \$verbosity, " .
611         "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
612         "\$remove_empty_classifications, " .
613         "\$out, \$no_text, \$faillog, \$gli)");
614    die "$@" if $@;
615
616    push(@builders,$this_builder);
617  }
618
619  # Init phase for builders
620  for (my $i=0; $i<$num_builders; $i++)
621  {
622    my $this_buildertype = $buildertype_list[$i];
623    my $this_builderdir  = $builderdir_list[$i];
624    my $this_builder     = $builders[$i];
625
626    $this_builder->init();
627    $this_builder->set_maxnumeric($maxnumeric);
628
629    if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
630    {
631      $this_builder->set_strip_html(0);
632    }
633
634    if ($sections_index_document_metadata ne "never")
635    {
636      $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
637    }
638    if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
639    {
640      $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
641    }
642
643    if ($store_metadata_coverage)
644    {
645      $this_builder->set_store_metadata_coverage(1);
646    }
647  }
648  return \@builders;
649}
650
651sub build_collection
652{
653  my $self = shift(@_);
654  my @builders = @{shift(@_)};
655
656  my $indexlevel  = $self->{'indexlevel'};
657  my $indexname   = $self->{'indexname'};
658  my $mode        = $self->{'mode'};
659  my $textindex   = $self->{'textindex'};
660
661  # Run the requested passes
662  if ($mode =~ /^(all|extra)$/i)
663  {
664    # 'map' modifies the elements of the original array, so calling
665    # methods -- as done below -- will cause (by default) @builders
666    # to be changed to whatever these functions return (which is *not*
667    # what we want -- we want to leave the values unchanged)
668    # => Use 'local' (dynamic scoping) to give each 'map' call its
669    #    own local copy This could also be done with:
670    #      (my $new =$_)->method(); $new
671    #    but is a bit more cumbersome to write
672    map { local $_=$_; $_->compress_text($textindex); } @builders;
673    # - we pass the required indexname and indexlevel (if specified) to the
674    #   processor [jmt12]
675    map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
676
677    # when incrementally rebuilding a collection using any db that doesn't support concurrent
678    # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()   
679    map {
680    local $_=$_;
681
682    if($_->supports_make_infodatabase()) {
683        my $infodbtype = $_->{'infodbtype'};
684        my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
685   
686        if(!$dbSupportsConcurrentRW) {
687        #$self->{'justdeactivate'} = 1;
688        #$self->activate_collection();
689        #$self->{'justdeactivate'} = 0;
690        $self->activate_collection("just_deactivate");     
691        }
692        $_->make_infodatabase();       
693    }
694
695    }  @builders;
696
697    map { local $_=$_; $_->collect_specific(); } @builders;
698  }
699  elsif ($mode =~ /^compress_text$/i)
700  {
701    map { local $_=$_; $_->compress_text($textindex); } @builders;
702  }
703  elsif ($mode =~ /^build_index$/i)
704  {
705    map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
706  }
707  elsif ($mode =~ /^infodb$/i)
708  {
709    map {
710    local $_=$_;
711
712    # when incrementally rebuilding a collection using any db that doesn't support concurrent
713    # read and write (e.g. gdbm), need to deactivate the collection before make_infodatabase()
714
715    if($_->supports_make_infodatabase()) {
716        my $infodbtype = $_->{'infodbtype'};
717        my $dbSupportsConcurrentRW = &dbutil::supportsConcurrentReadAndWrite($infodbtype);
718   
719        if(!$dbSupportsConcurrentRW) {
720        $self->activate_collection("just_deactivate");     
721        }
722        $_->make_infodatabase();
723    }
724    } @builders;
725  }
726  else
727  {
728    (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
729  }
730}
731# build_collection()
732
733# @function build_auxiliary_files
734#
735sub build_auxiliary_files
736{
737  my $self = shift(@_);
738  my @builders = @{shift(@_)};
739  if (!$self->{'debug'})
740  {
741    map {local $_=$_; $_->make_auxiliary_files(); } @builders;
742  }
743}
744# build_auxiliary_files()
745
746# @function complete_builders
747#
748sub complete_builders
749{
750  my $self = shift(@_);
751  my @builders = @{shift(@_)};
752
753  map {local $_=$_; $_->deinit(); } @builders;
754
755  if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
756  {
757    if ($self->{'verbosity'} >= 1)
758    {
759      &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
760    }
761    &FileUtils::removeFilesRecursive($self->{'builddir'});
762    &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
763  }
764
765  # for RSS support: Need rss-items.rdf file in index folder
766  #  check if a file called rss-items.rdf exists in archives, then copy it into the building folder
767  #  so that when building is moved to index, this file will then also be in index as desired
768  my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
769                                                     $self->{'collection'},
770                                                     $self->{'site'});
771  my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
772  # @todo FileUtils
773  if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
774  {
775    if ($self->{'verbosity'} >= 1)
776    {
777    my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
778    my $builddir_tail =  "'".basename($self->{'builddir'})."'";
779
780    &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
781    }
782    &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
783  }
784
785  if ($self->{'gli'})
786  {
787    print STDERR "</Build>\n";
788  }
789}
790# complete_builders()
791
792# @function activate_collection
793#
794sub activate_collection
795{
796  my $self = shift(@_); 
797  my $activation_cmd = shift(@_);
798  my($just_activate, $just_deactivate);
799  if(defined $activation_cmd) {
800      $just_deactivate = 1 if($activation_cmd eq "just_deactivate");
801      $just_activate = 1 if($activation_cmd eq "just_activate");
802      if($activation_cmd eq "just_deactivate_and_activate") {
803      $just_deactivate = 1;
804      $just_activate = 1;
805      }
806  }
807
808  # if buildcol.pl was run with -activate, need to run activate.pl
809  # now that building's complete
810  if ($self->{'activate'})
811  {
812    #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
813    my @activate_argv = ();
814    push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
815    push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
816    push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
817    push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
818    push(@activate_argv, '-removeold') if ($self->{'removeold'});
819    push(@activate_argv, '-keepold') if ($self->{'keepold'});
820    push(@activate_argv, '-incremental') if ($self->{'incremental'});
821    push(@activate_argv, '-justactivate') if ($just_activate || $self->{'justactivate'});
822    push(@activate_argv, '-justdeactivate') if ($just_deactivate || $self->{'justdeactivate'});
823    my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
824    my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
825    my $activatecol_status = system($activatecol_cmd)/256;
826
827    if ($activatecol_status != 0)
828    {
829      print STDERR "Error: Failed to run: $activatecol_cmd\n";
830      print STDERR "       $!\n" if ($! ne '');
831      exit(-1);
832    }
833  }
834}
835
836# @function deinit()
837#
838sub deinit
839{
840  my $self = shift(@_);
841
842  if ($self->{'close_out'})
843  {
844    close OUT;
845  }
846  if ($self->{'close_faillog'})
847  {
848    close FAILLOG;
849  }
850}
851# deinit()
852
8531;
Note: See TracBrowser for help on using the browser.