root/main/trunk/greenstone2/perllib/buildcolutils.pm @ 29096

Revision 29096, 25.8 KB (checked in by kjdon, 6 years ago)

new argument to print_txt_usage. Pass 1 if you don't want the output paged. We use this when there has been an error and we are outputing the options before quitting the import/build. If the output is paged, then the die doesn't end up getting through to the top level program. So for full-rebuild, if the import died because of a parsing error, if the output had been paged, then the import was stopped but the system return value was 0, and then it would go on to the next stage, trying to build. So now, if we are stopping because of an error, then don't page the output. Also added a few more (hopefully) helpful error messages

  • Property svn:executable set to *
Line 
1###############################################################################
2#
3# buildcolutils.pm -- index and build the collection. The buildtime counterpart
4#                    of inexport.pl
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package buildcolutils;
29
30#use strict;
31#no strict 'refs';
32
33use File::Basename;
34
35use colcfg;
36use dbutil;
37use util;
38use FileUtils;
39use scriptutil;
40use gsprintf;
41use printusage;
42use parse2;
43
44## @method new()
45#
46#  Parses up and validates the arguments to the build process before creating
47#  the appropriate build process to do the actual work
48#
49#  @note Added true incremental support - John Thompson, DL Consulting Ltd.
50#  @note There were several bugs regarding using directories other than
51#        "import" or "archives" during import and build quashed. - John
52#        Thompson, DL Consulting Ltd.
53#
54#  @param  $incremental If true indicates this build should not regenerate all
55#                       the index and metadata files, and should instead just
56#                       append the information found in the archives directory
57#                       to the existing files. If this requires some complex
58#                       work so as to correctly insert into a classifier so be
59#                       it. Of course none of this is done here - instead the
60#                       incremental argument is passed to the document
61#                       processor.
62#
63sub new
64{
65  my $class = shift(@_);
66  my ($argv, $options, $opt_listall_options) = @_;
67
68  my $self = {'builddir' => undef,
69              'buildtype' => undef,
70              'close_faillog' => 0,
71              'close_out' => 0,
72              'mode' => '',
73              'orthogonalbuildtypes' => undef,
74              'realbuilddir' => undef,
75              'textindex' => '',
76              'xml' => 0
77             };
78
79  # general options available to all plugins
80  my $arguments = $options->{'args'};
81  my $intArgLeftinAfterParsing = &parse2::parse($argv, $arguments, $self, "allow_extra_options");
82  # If parse returns -1 then something has gone wrong
83  if ($intArgLeftinAfterParsing == -1)
84  {
85    &PrintUsage::print_txt_usage($options, "{buildcol.params}",1);
86    print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n";
87    die "\n";
88  }
89
90  # If $language has been specified, load the appropriate resource bundle
91  # (Otherwise, the default resource bundle will be loaded automatically)
92  if ($self->{'language'} && $self->{'language'} =~ /\S/)
93  {
94    &gsprintf::load_language_specific_resource_bundle($self->{'language'});
95  }
96
97  # Do we need 'listall' support in buildcol? If so, copy code from inexport
98  # later [jmt12]
99
100  # <insert explanation here>
101  if ($self->{'xml'})
102  {
103    &PrintUsage::print_xml_usage($options);
104    print "\n";
105    return bless($self, $class);
106  }
107
108  # the gli wants strings to be in UTF-8
109  if ($gli)
110  {
111    &gsprintf::output_strings_in_UTF8;
112  }
113 
114  # If the user specified -h, then we output the usage
115  if (@$argv && $argv->[0] =~ /^\-+h/) {
116      &PrintUsage::print_txt_usage($options, "{buildcol.params}");
117      die "\n";
118  }
119 
120  # now check that we had exactly one leftover arg, which should be
121  # the collection name. We don't want to do this earlier, cos
122  # -xml arg doesn't need a collection name
123  if ($intArgLeftinAfterParsing != 1)
124  {
125    &PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
126    print STDERR "There should be one argument left after parsing the script args: the collection name.\n";
127    die "\n";
128  }
129
130  my $out = $self->{'out'};
131  if ($out !~ /^(STDERR|STDOUT)$/i)
132  {
133    open (OUT, ">$out") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
134    $out = "buildcolutils::OUT";
135    $self->{'close_out'} = 1;
136  }
137  $out->autoflush(1);
138  $self->{'out'} = $out;
139
140  # @ARGV should be only one item, the name of the collection
141  $self->{'collection'} = shift(@{$argv});
142
143  return bless($self, $class);
144}
145# new()
146
147# newCGI()?
148
149# @function get_collection
150#
151sub get_collection
152{
153  my $self = shift @_;
154  return $self->{'collection'};
155}
156# get_collection()
157
158# @function read_collection_cfg
159#
160sub read_collection_cfg
161{
162  my $self = shift(@_);
163  my ($collection, $options) = @_;
164
165  my $collectdir = $self->{'collectdir'};
166  my $site       = $self->{'site'};
167  my $out        = $self->{'out'};
168
169  # get and check the collection
170  if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "")
171  {
172    #&PrintUsage::print_txt_usage($options, "{buildcol.params}", 1);
173    die "\n";
174  }
175
176  # set gs_version 2/3
177  $self->{'gs_version'} = "2";
178  if ((defined $site) && ($site ne ""))
179  {
180    # gs3
181    $self->{'gs_version'} = "3";
182  }
183
184  # add collection's perllib dir into include path in case we have collection
185  # specific modules
186  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib'));
187  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'classify'));
188  &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins'));
189
190  # check that we can open the faillog
191  my $faillog = $self->{'faillog'};
192  if ($faillog eq "")
193  {
194    $faillog = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
195  }
196  # note that we're appending to the faillog here (import.pl clears it each time)
197  # this could potentially create a situation where the faillog keeps being added
198  # to over multiple builds (if the import process is being skipped)
199  open (FAILLOG, ">>$faillog") || (&gsprintf::gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
200  $faillog = 'buildcolutils::FAILLOG';
201  $faillog->autoflush(1);
202  $self->{'faillog'} = $faillog;
203  $self->{'faillogname'} = $faillog;
204  $self->{'close_faillog'} = 1;
205
206  # Read in the collection configuration file.
207  my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
208  my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
209  my $collect_cfg = &colcfg::read_collection_cfg($config_filename, $gs_mode);
210
211  return ($config_filename, $collect_cfg);
212}
213# read_collection_cfg()
214
215# @function set_collection_options
216# This function copies across values for arguments from the collection
217# configuration file if they are not already provided by the user, then
218# sets reasonable defaults for any required arguments that remains without
219# a value.
220sub set_collection_options
221{
222  my $self = shift @_;
223  my ($collectcfg) = @_;
224  my ($buildtype, $orthogonalbuildtypes);
225
226  # If the infodbtype value wasn't defined in the collect.cfg file, use the default
227  if (!defined($collectcfg->{'infodbtype'}))
228  {
229    $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
230  }
231  # - just so I don't have to pass collectcfg around as well
232  $self->{'infodbtype'} = $collectcfg->{'infodbtype'};
233
234  if ($self->{'verbosity'} !~ /\d+/)
235  {
236    if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/)
237    {
238      $self->{'verbosity'} = $collectcfg->{'verbosity'};
239    }
240    else
241    {
242      $self->{'verbosity'} = 2; # the default
243    }
244  }
245
246  # we use searchtype for determining buildtype, but for old versions, use buildtype
247  if (defined $collectcfg->{'buildtype'})
248  {
249    $self->{'buildtype'} = $collectcfg->{'buildtype'};
250  }
251  elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'})
252  {
253    $self->{'buildtype'} = "mgpp";
254  }
255  else
256  {
257    $self->{'buildtype'} = "mg"; #mg is the default
258  }
259
260  if ($self->{'buildtype'} eq "mgpp" && defined $collectcfg->{'textcompress'})
261  {
262    $self->{'textindex'} = $collectcfg->{'textcompress'};
263  }
264
265  # is it okay to always clobber or possible remain undefined? [jmt12]
266  if (defined $collectcfg->{'orthogonalbuildtypes'})
267  {
268    $self->{'orthogonalbuildtypes'} = $collectcfg->{'orthogonalbuildtypes'};
269  }
270
271  # - resolve (and possibly set to default) builddir
272  if (defined $collectcfg->{'archivedir'} && $self->{'archivedir'} eq "")
273  {
274    $self->{'archivedir'} = $collectcfg->{'archivedir'};
275  }
276  # Modified so that the archivedir, if provided as an argument, is made
277  # absolute if it isn't already
278  if ($self->{'archivedir'} eq "")
279  {
280    $self->{'archivedir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "archives");
281  }
282  else
283  {
284    $self->{'archivedir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'archivedir'});
285  }
286  # End Mod
287  $self->{'archivedir'} = &FileUtils::sanitizePath($self->{'archivedir'});
288  #$self->{'archivedir'} =~ s/[\\\/]+/\//g;
289  #$self->{'archivedir'} =~ s/\/$//;
290
291  # - resolve (and possibly set to default) builddir
292  if (defined $collectcfg->{'builddir'} && $self->{'builddir'} eq "")
293  {
294    $self->{'builddir'} = $collectcfg->{'builddir'};
295  }
296  if ($self->{'builddir'} eq "")
297  {
298    $self->{'builddir'} = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, 'building');
299    if ($incremental)
300    {
301      &gsprintf::gsprintf($out, "{buildcol.incremental_default_builddir}\n");
302    }
303  } else {
304      # make absolute if not already
305      $self->{'builddir'} = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $self->{'builddir'});
306  }
307 
308  $self->{'builddir'} = &FileUtils::sanitizePath($self->{'builddir'});
309  #$self->{'builddir'} =~ s/[\\\/]+/\//g;
310  #$self->{'builddir'} =~ s/\/$//;
311
312  if (defined $collectcfg->{'cachedir'} && $self->{'cachedir'} eq "")
313  {
314    $self->{'cachedir'} = $collectcfg->{'cachedir'};
315  }
316
317  if ($self->{'maxdocs'} !~ /\-?\d+/)
318  {
319    if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/)
320    {
321      $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
322    }
323    else
324    {
325      $self->{'maxdocs'} = -1; # the default
326    }
327  }
328
329  # always clobbers? [jmt12]
330  if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/)
331  {
332    $self->{'maxnumeric'} = $collectcfg->{'maxnumeric'};
333  }
334  if ($self->{'maxnumeric'} < 4 || $self->{'maxnumeric'} > 512)
335  {
336    $self->{'maxnumeric'} = 4;
337  }
338
339  if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i)
340  {
341    $self->{'debug'} = 1;
342  }
343
344  if ($self->{'mode'} !~ /^(all|compress_text|build_index|infodb|extra)$/)
345  {
346    if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb|extra)$/)
347    {
348      $self->{'mode'} = $collectcfg->{'mode'};
349    }
350    else
351    {
352      $self->{'mode'} = "all"; # the default
353    }
354  }
355
356  # Presumably 'index' from the collect.cfg still works [jmt12]
357  if (defined $collectcfg->{'index'} && $self->{'indexname'} eq "")
358  {
359    $self->{'indexname'} = $collectcfg->{'index'};
360  }
361  # - 'index' from the command line doesn't make it through parsing so I
362  # renamed this option 'indexname' [jmt12]
363  if (defined $collectcfg->{'indexname'} && $self->{'indexname'} eq "")
364  {
365    $self->{'indexname'} = $collectcfg->{'indexname'};
366  }
367  # - we may also define the index level to build now [jmt12]
368  if (defined $collectcfg->{'indexlevel'} && $self->{'indexlevel'} eq "")
369  {
370    $self->{'indexlevel'} = $collectcfg->{'indexlevel'};
371  }
372
373  if (defined $collectcfg->{'no_text'} && $self->{'no_text'} == 0)
374  {
375    if ($collectcfg->{'no_text'} =~ /^true$/i)
376    {
377      $self->{'no_text'} = 1;
378    }
379  }
380
381  if (defined $collectcfg->{'no_strip_html'} && $self->{'no_strip_html'} == 0)
382  {
383    if ($collectcfg->{'no_strip_html'} =~ /^true$/i)
384    {
385      $self->{'no_strip_html'} = 1;
386    }
387  }
388
389  if (defined $collectcfg->{'store_metadata_coverage'} && $self->{'store_metadata_coverage'} == 0)
390  {
391    if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i)
392    {
393      $self->{'store_metadata_coverage'} = 1;
394    }
395  }
396
397  if (defined $collectcfg->{'remove_empty_classifications'} && $self->{'remove_empty_classifications'} == 0)
398  {
399    if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i)
400    {
401      $self->{'remove_empty_classifications'} = 1;
402    }
403  }
404
405  if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i)
406  {
407    $self->{'gli'} = 1;
408  }
409  if (!defined $self->{'gli'})
410  {
411    $self->{'gli'} = 0;
412  }
413
414  if ($self->{'sections_index_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'})
415  {
416    $self->{'sections_index_document_metadata'} = $collectcfg->{'sections_index_document_metadata'};
417  }
418
419  if ($self->{'sections_index_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
420    $self->{'sections_index_document_metadata'} = 'never';
421  }
422
423  if ($self->{'sections_sort_on_document_metadata'} !~ /\S/ && defined $collectcfg->{'sections_sort_on_document_metadata'})
424  {
425    $self->{'sections_sort_on_document_metadata'} = $collectcfg->{'sections_sort_on_document_metadata'};
426  }
427
428  if ($self->{'sections_sort_on_document_metadata'} !~ /^(never|always|unless_section_metadata_exists)$/) {
429    $self->{'sections_sort_on_document_metadata'} = 'never';
430  }
431
432  my ($removeold, $keepold, $incremental, $incremental_mode)
433      = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
434                                                 $self->{'incremental'}, 'building',
435                                                 $collectcfg);
436  $self->{'removeold'}        = $removeold;
437  $self->{'keepold'}          = $keepold;
438  $self->{'incremental'}      = $incremental;
439  $self->{'incremental_mode'} = $incremental_mode;
440
441  # New argument to track whether build is incremental
442  if (!defined $self->{'incremental'})
443  {
444    $self->{'incremental'} = 0;
445  }
446
447  #set the text index
448  if (($self->{'buildtype'} eq 'mgpp') || ($self->{'buildtype'} eq 'lucene') || ($self->{'buildtype'} eq 'solr'))
449  {
450    if ($self->{'textindex'} eq '')
451    {
452      $self->{'textindex'} = 'text';
453    }
454  }
455  else
456  {
457    $self->{'textindex'} = 'section:text';
458  }
459}
460# set_collection_options()
461
462# @function prepare_builders
463#
464sub prepare_builders
465{
466  my $self = shift @_;
467  my ($config_filename,$collectcfg) = @_;
468
469  my $archivedir  = $self->{'archivedir'};
470  my $builddir    = $self->{'builddir'};
471  my $buildtype   = $self->{'buildtype'};
472  my $cachedir    = $self->{'cachedir'};
473  my $collectdir  = $self->{'collectdir'};
474  my $collection  = $self->{'collection'};
475  my $debug       = $self->{'debug'};
476  my $faillog     = $self->{'faillog'};
477  my $gli         = $self->{'gli'};
478  my $incremental = $self->{'incremental'};
479  my $incremental_mode = $self->{'incremental_mode'};
480  my $keepold     = $self->{'keepold'};
481  my $maxdocs     = $self->{'maxdocs'};
482  my $maxnumeric  = $self->{'maxnumeric'};
483  my $no_strip_html = $self->{'no_strip_html'};
484  my $no_text     = $self->{'no_text'};
485  my $orthogonalbuildtypes = $self->{'orthogonalbuildtypes'};
486  my $out         = $self->{'out'};
487  my $remove_empty_classifications = $self->{'remove_empty_classifications'};
488  my $sections_index_document_metadata = $self->{'sections_index_document_metadata'};
489  my $sections_sort_on_document_metadata = $self->{'sections_sort_on_document_metadata'};
490  my $site        = $self->{'site'};
491  my $store_metadata_coverage = $self->{'store_metadata_coverage'};
492  my $verbosity   = $self->{'verbosity'};
493
494  if ($gli)
495  {
496    print STDERR "<Build>\n";
497  }
498
499  # fill in the default archives and building directories if none
500  # were supplied, turn all \ into / and remove trailing /
501
502  my ($realarchivedir, $realbuilddir);
503  # update the archive cache if needed
504  if ($cachedir)
505  {
506    if ($verbosity >= 1)
507    {
508      &gsprintf::gsprintf($out, "{buildcol.updating_archive_cache}\n")
509    }
510
511    $cachedir =~ s/[\\\/]+$//;
512    if ($cachedir !~ /collect[\/\\]$collection/)
513    {
514      $cachedir = &FileUtils::filenameConcatenate($cachedir, 'collect', $collection);
515    }
516
517    $realarchivedir = &FileUtils::filenameConcatenate($cachedir, 'archives');
518    $realbuilddir = &FileUtils::filenameConcatenate($cachedir, 'building');
519    &FileUtils::makeAllDirectories($realarchivedir);
520    &FileUtils::makeAllDirectories($realbuilddir);
521    &FileUtils::synchronizeDirectory($archivedir, $realarchivedir, $verbosity);
522  }
523  else
524  {
525    $realarchivedir = $archivedir;
526    $realbuilddir = $builddir;
527  }
528  $self->{'realarchivedir'} = $realarchivedir;
529  $self->{'realbuilddir'} = $realbuilddir;
530
531  # build it in realbuilddir
532  &FileUtils::makeAllDirectories($realbuilddir);
533
534  my ($buildertype, $builderdir,  $builder);
535  # if a builder class has been created for this collection, use it
536  # otherwise, use the mg or mgpp builder
537  if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm")
538  {
539    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
540    $buildertype = "custombuilder";
541  }
542  elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm")
543  {
544    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
545    $buildertype = "custombuilder";
546  }
547  elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm")
548  {
549    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
550    $buildertype = $collection . 'builder';
551  }
552  else
553  {
554    $builderdir = undef;
555    if ($buildtype ne '')
556    {
557      # caters for extension-based build types, such as 'solr'
558      $buildertype = $buildtype . 'builder';
559    }
560    else
561    {
562      # Default to mgpp
563      $buildertype = 'mgppbuilder';
564    }
565  }
566  # check for extension specific builders
567  # (that will then be run after main builder.pm
568  my @builderdir_list = ($builderdir);
569  my @buildertype_list = ($buildertype);
570
571  my $mode = $self->{'mode'};
572
573  if ($mode eq "extra") {
574      # knock out the main builder type, by reseting the lists to be empty
575      @builderdir_list = ();
576      @buildertype_list = ();
577  }
578
579  if (defined $orthogonalbuildtypes)
580  {
581    foreach my $obt (@$orthogonalbuildtypes)
582    {
583      push(@builderdir_list,undef); # rely on @INC to find it
584      push(@buildertype_list,$obt."Builder");
585    }
586  }
587
588  # Set up array of the main builder.pm, followed by any ones
589  # from the extension folders
590
591  my $num_builders = scalar(@buildertype_list);
592  my @builders = ();
593
594  for (my $i=0; $i<$num_builders; $i++)
595  {
596    my $this_builder;
597    my $this_buildertype = $buildertype_list[$i];
598    my $this_builderdir  = $builderdir_list[$i];
599
600    if ((defined $this_builderdir) && ($this_builderdir ne ""))
601    {
602      require "$this_builderdir/$this_buildertype.pm";
603    }
604    else
605    {
606      require "$this_buildertype.pm";
607    }
608
609    eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
610         "\$realarchivedir, \$realbuilddir, \$verbosity, " .
611         "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
612         "\$remove_empty_classifications, " .
613         "\$out, \$no_text, \$faillog, \$gli)");
614    die "$@" if $@;
615
616    push(@builders,$this_builder);
617  }
618
619  # Init phase for builders
620  for (my $i=0; $i<$num_builders; $i++)
621  {
622    my $this_buildertype = $buildertype_list[$i];
623    my $this_builderdir  = $builderdir_list[$i];
624    my $this_builder     = $builders[$i];
625
626    $this_builder->init();
627    $this_builder->set_maxnumeric($maxnumeric);
628
629    if (($this_buildertype eq "mgppbuilder") && $no_strip_html)
630    {
631      $this_builder->set_strip_html(0);
632    }
633
634    if ($sections_index_document_metadata ne "never")
635    {
636      $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
637    }
638    if (($this_buildertype eq "lucenebuilder" || $this_buildertype eq "solrbuilder") && $sections_sort_on_document_metadata ne "never")
639    {
640      $this_builder->set_sections_sort_on_document_metadata($sections_sort_on_document_metadata);
641    }
642
643    if ($store_metadata_coverage)
644    {
645      $this_builder->set_store_metadata_coverage(1);
646    }
647  }
648  return \@builders;
649}
650
651sub build_collection
652{
653  my $self = shift(@_);
654  my @builders = @{shift(@_)};
655
656  my $indexlevel  = $self->{'indexlevel'};
657  my $indexname   = $self->{'indexname'};
658  my $mode        = $self->{'mode'};
659  my $textindex   = $self->{'textindex'};
660
661  # Run the requested passes
662  if ($mode =~ /^(all|extra)$/i)
663  {
664    # 'map' modifies the elements of the original array, so calling
665    # methods -- as done below -- will cause (by default) @builders
666    # to be changed to whatever these functions return (which is *not*
667    # what we want -- we want to leave the values unchanged)
668    # => Use 'local' (dynamic scoping) to give each 'map' call its
669    #    own local copy This could also be done with:
670    #      (my $new =$_)->method(); $new
671    #    but is a bit more cumbersome to write
672    map { local $_=$_; $_->compress_text($textindex); } @builders;
673    # - we pass the required indexname and indexlevel (if specified) to the
674    #   processor [jmt12]
675    map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
676    map { local $_=$_; $_->make_infodatabase(); }  @builders;
677    map { local $_=$_; $_->collect_specific(); } @builders;
678  }
679  elsif ($mode =~ /^compress_text$/i)
680  {
681    map { local $_=$_; $_->compress_text($textindex); } @builders;
682  }
683  elsif ($mode =~ /^build_index$/i)
684  {
685    map { local $_=$_; $_->build_indexes($indexname, $indexlevel); } @builders;
686  }
687  elsif ($mode =~ /^infodb$/i)
688  {
689    map { local $_=$_; $_->make_infodatabase(); } @builders;
690  }
691  else
692  {
693    (&gsprintf::gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
694  }
695}
696# build_collection()
697
698# @function build_auxiliary_files
699#
700sub build_auxiliary_files
701{
702  my $self = shift(@_);
703  my @builders = @{shift(@_)};
704  if (!$self->{'debug'})
705  {
706    map {local $_=$_; $_->make_auxiliary_files(); } @builders;
707  }
708}
709# build_auxiliary_files()
710
711# @function complete_builders
712#
713sub complete_builders
714{
715  my $self = shift(@_);
716  my @builders = @{shift(@_)};
717
718  map {local $_=$_; $_->deinit(); } @builders;
719
720  if (($self->{'realbuilddir'} ne $self->{'builddir'}) && !$self->{'debug'})
721  {
722    if ($self->{'verbosity'} >= 1)
723    {
724      &gsprintf::gsprintf($out, "{buildcol.copying_back_cached_build}\n");
725    }
726    &FileUtils::removeFilesRecursive($self->{'builddir'});
727    &FileUtils::copyFilesRecursive($self->{'realbuilddir'}, $self->{'builddir'});
728  }
729
730  # for RSS support: Need rss-items.rdf file in index folder
731  #  check if a file called rss-items.rdf exists in archives, then copy it into the building folder
732  #  so that when building is moved to index, this file will then also be in index as desired
733  my $collection_dir = &util::resolve_collection_dir($self->{'collectdir'},
734                                                     $self->{'collection'},
735                                                     $self->{'site'});
736  my $rss_items_rdf_file = &FileUtils::filenameConcatenate($self->{'archivedir'}, 'rss-items.rdf');
737  # @todo FileUtils
738  if(defined $self->{'builddir'} && &FileUtils::directoryExists($self->{'builddir'}) && &FileUtils::fileExists($rss_items_rdf_file))
739  {
740    if ($self->{'verbosity'} >= 1)
741    {
742    my $archivedir_tail = "'".basename($self->{'archivedir'})."'";
743    my $builddir_tail =  "'".basename($self->{'builddir'})."'";
744
745    &gsprintf::gsprintf($self->{'out'}, "{buildcol.copying_rss_items_rdf}\n", $archivedir_tail, $builddir_tail);
746    }
747    &FileUtils::copyFiles($rss_items_rdf_file, $self->{'builddir'});
748  }
749
750  if ($self->{'gli'})
751  {
752    print STDERR "</Build>\n";
753  }
754}
755# complete_builders()
756
757# @function activate_collection
758#
759sub activate_collection
760{
761  my $self = shift(@_);
762  # if buildcol.pl was run with -activate, need to run activate.pl
763  # now that building's complete
764  if ($self->{'activate'})
765  {
766    #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
767    my @activate_argv = ();
768    push(@activate_argv, '-collectdir', $self->{'collectdir'}) if ($self->{'collectdir'});
769    push(@activate_argv, '-builddir', $self->{'builddir'}) if ($self->{'builddir'});
770    push(@activate_argv, '-site', $self->{'site'}) if ($self->{'site'});
771    push(@activate_argv, '-verbosity', $self->{'verbosity'}) if ($self->{'verbosity'});
772    push(@activate_argv, '-removeold') if ($self->{'removeold'});
773    push(@activate_argv, '-keepold') if ($self->{'keepold'});
774    push(@activate_argv, '-incremental') if ($self->{'incremental'});
775    my $quoted_argv = join(' ', map { "\"$_\"" } @activate_argv);
776    my $activatecol_cmd = '"' . &util::get_perl_exec(). '" -S activate.pl ' . $quoted_argv . ' "' . $self->get_collection() . '"';
777    my $activatecol_status = system($activatecol_cmd)/256;
778
779    if ($activatecol_status != 0)
780    {
781      print STDERR "Error: Failed to run: $activatecol_cmd\n";
782      print STDERR "       $!\n" if ($! ne '');
783      exit(-1);
784    }
785  }
786}
787
788# @function deinit()
789#
790sub deinit
791{
792  my $self = shift(@_);
793
794  if ($self->{'close_out'})
795  {
796    close OUT;
797  }
798  if ($self->{'close_faillog'})
799  {
800    close FAILLOG;
801  }
802}
803# deinit()
804
8051;
Note: See TracBrowser for help on using the browser.