source: gsdl/trunk/perllib/basebuilder.pm@ 20095

Last change on this file since 20095 was 20095, checked in by kjdon, 15 years ago

changes for cfgread4gs3 renaming, and to make this look for buildConfig.xml instead of only build.cfg

  • Property svn:keywords set to Author Date Id Revision
File size: 22.5 KB
Line 
1###########################################################################
2#
3# basebuilder.pm -- base class for collection builders
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package basebuilder;
27
28use strict;
29no strict 'refs'; # allow filehandles to be variables and viceversa
30
31use classify;
32use cfgread;
33use colcfg;
34use dbutil;
35use plugin;
36use util;
37
38
39BEGIN {
40 # set autoflush on for STDERR and STDOUT so that mgpp
41 # doesn't get out of sync with plugins
42 STDOUT->autoflush(1);
43 STDERR->autoflush(1);
44}
45
46END {
47 STDOUT->autoflush(0);
48 STDERR->autoflush(0);
49}
50
51our $maxdocsize = 12000;
52
53# used to signify "gs2"(default) or "gs3"
54our $gs_mode = "gs2";
55
56sub new {
57 my ($class, $collection, $source_dir, $build_dir, $verbosity,
58 $maxdocs, $debug, $keepold, $incremental,
59 $remove_empty_classifications,
60 $outhandle, $no_text, $failhandle, $gli, $disable_OAI) = @_;
61
62 $outhandle = *STDERR unless defined $outhandle;
63 $no_text = 0 unless defined $no_text;
64 $failhandle = *STDERR unless defined $failhandle;
65
66 # create a builder object
67 my $self = bless {'collection'=>$collection,
68 'source_dir'=>$source_dir,
69 'build_dir'=>$build_dir,
70 'verbosity'=>$verbosity,
71 'maxdocs'=>$maxdocs,
72 'debug'=>$debug,
73 'keepold'=>$keepold,
74 'incremental'=>$incremental,
75 'remove_empty_classifications'=>$remove_empty_classifications,
76 'outhandle'=>$outhandle,
77 'no_text'=>$no_text,
78 'failhandle'=>$failhandle,
79 'notbuilt'=>{}, # indexes not built
80 'gli'=>$gli,
81 'disable_OAI'=>$disable_OAI
82 }, $class;
83
84 $self->{'gli'} = 0 unless defined $self->{'gli'};
85
86 # disable_OAI applies to greenstone 3 only and is only passed to &colcfg::write_build_cfg_xml (then buildConfigxml::write_build_cfg_file) when writing the buildConfig.xml
87 $self->{'disable_OAI'} = 0 unless defined $self->{'disable_OAI'};
88
89 # Read in the collection configuration file.
90 my ($colcfgname);
91 ($colcfgname, $gs_mode) = &colcfg::get_collect_cfg_name($outhandle);
92 if ($gs_mode eq "gs2") {
93 $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname);
94 } elsif ($gs_mode eq "gs3") {
95 $self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname);
96
97 #this $self->{'collect_cfg_preserve'} is used for gs3 only and to be passed to &colcfg::write_build_cfg_xml in sub make_auxilary_files later in this basebuilder.pm, we use this preserve object because $self->{'collect_cfg'}->{'classify'} somewhat gets modified during the calling of &classify::load_classifiers.
98 $self->{'collect_cfg_preserve'} = &colcfg::read_collection_cfg_xml ($colcfgname);
99 }
100
101 # get the database type for this collection from the collect.cfg file (may be undefined)
102 $self->{'infodbtype'} = $self->{'collect_cfg'}->{'infodbtype'} || &dbutil::get_default_infodb_type();
103
104 # get the list of plugins for this collection
105 my $plugins = [];
106 if (defined $self->{'collect_cfg'}->{'plugin'}) {
107 $plugins = $self->{'collect_cfg'}->{'plugin'};
108 }
109
110 # load all the plugins
111
112 #build up the extra global options for the plugins
113 my @global_opts = ();
114 if (defined $self->{'collect_cfg'}->{'separate_cjk'} && $self->{'collect_cfg'}->{'separate_cjk'} =~ /^true$/i) {
115 push @global_opts, "-separate_cjk";
116 }
117 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle, $failhandle, \@global_opts, $keepold);
118
119 if (scalar(@{$self->{'pluginfo'}}) == 0) {
120 print $outhandle "No plugins were loaded.\n";
121 die "\n";
122 }
123
124 # get the list of classifiers for this collection
125 my $classifiers = [];
126 if (defined $self->{'collect_cfg'}->{'classify'}) {
127 $classifiers = $self->{'collect_cfg'}->{'classify'};
128 }
129
130 # load all the classifiers
131 $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $build_dir, $outhandle);
132
133 # load up any dontdb fields
134 $self->{'dontdb'} = {};
135 if (defined ($self->{'collect_cfg'}->{'dontgdbm'})) {
136 foreach my $dg (@{$self->{'collect_cfg'}->{'dontgdbm'}}) {
137 $self->{'dontdb'}->{$dg} = 1;
138 }
139 }
140
141 $self->{'maxnumeric'} = 4;
142 return $self;
143}
144
145# stuff has been moved here from new, so we can use subclass methods
146sub init {
147 my $self = shift(@_);
148
149 $self->generate_index_list();
150 my $indexes = $self->{'collect_cfg'}->{'indexes'};
151 if (defined $indexes) {
152 # sort out subcollection indexes
153 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
154 $self->{'collect_cfg'}->{'indexes'} = [];
155 foreach my $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
156 foreach my $index (@$indexes) {
157 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
158 }
159 }
160 }
161
162 # sort out language subindexes
163 if (defined $self->{'collect_cfg'}->{'languages'}) {
164 $indexes = $self->{'collect_cfg'}->{'indexes'};
165 $self->{'collect_cfg'}->{'indexes'} = [];
166 foreach my $language (@{$self->{'collect_cfg'}->{'languages'}}) {
167 foreach my $index (@$indexes) {
168 if (defined ($self->{'collect_cfg'}->{'indexsubcollections'})) {
169 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
170 }
171 else { # add in an empty subcollection field
172 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index\:\:$language");
173 }
174 }
175 }
176 }
177 }
178
179 if (defined($self->{'collect_cfg'}->{'indexes'})) {
180 # make sure that the same index isn't specified more than once
181 my %tmphash = ();
182 my @tmparray = @{$self->{'collect_cfg'}->{'indexes'}};
183 $self->{'collect_cfg'}->{'indexes'} = [];
184 foreach my $i (@tmparray) {
185 if (!defined ($tmphash{$i})) {
186 push (@{$self->{'collect_cfg'}->{'indexes'}}, $i);
187 $tmphash{$i} = 1;
188 }
189 }
190 } else {
191 $self->{'collect_cfg'}->{'indexes'} = [];
192 }
193
194 # load up the document processor for building
195 # if a buildproc class has been created for this collection, use it
196 # otherwise, use the mg buildproc
197 my ($buildprocdir, $buildproctype);
198 my $collection = $self->{'collection'};
199 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuildproc.pm") {
200 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
201 $buildproctype = "custombuildproc";
202 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuildproc.pm") {
203 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
204 $buildproctype = "custombuildproc";
205 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
206 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
207 $buildproctype = "${collection}buildproc";
208 } else {
209 $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
210 $buildproctype = $self->default_buildproc();
211 }
212 require "$buildprocdir/$buildproctype.pm";
213
214 eval("\$self->{'buildproc'} = new $buildproctype(\$self->{'collection'}, " .
215 "\$self->{'source_dir'}, \$self->{'build_dir'}, \$self->{'keepold'}, \$self->{'verbosity'}, \$self->{'outhandle'})");
216 die "$@" if $@;
217
218
219 $self->generate_index_options();
220
221 if (!$self->{'debug'} && !$self->{'keepold'}) {
222 # remove any old builds
223 &util::rm_r($self->{'build_dir'});
224 &util::mk_all_dir($self->{'build_dir'});
225
226 # make the text directory
227 my $textdir = "$self->{'build_dir'}/text";
228 &util::mk_all_dir($textdir);
229 }
230
231 if ($self->{'incremental'}) {
232 # some classes may need to do some additional initialisation
233 $self->init_for_incremental_build();
234 }
235
236}
237
238# implement this in subclass if want to do additional initialisation for an
239# incremental build
240sub init_for_incremental_build {
241 my $self = shift (@_);
242}
243
244sub deinit {
245 my $self = shift (@_);
246
247 &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
248}
249
250sub generate_index_options {
251 my $self = shift (@_);
252
253 my $separate_cjk = 0;
254
255 if (defined($self->{'collect_cfg'}->{'indexoptions'})) {
256 foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) {
257 if ($option =~ /separate_cjk/) {
258 $separate_cjk = 1;
259 }
260 }
261 }
262 # set this for building
263 $self->{'buildproc'}->set_separate_cjk($separate_cjk);
264 # record it for build.cfg
265 $self->{'separate_cjk'} = $separate_cjk;
266}
267
268sub set_sections_index_document_metadata {
269 my $self = shift (@_);
270 my ($index) = @_;
271
272 $self->{'buildproc'}->set_sections_index_document_metadata($index);
273}
274
275sub set_maxnumeric {
276 my $self = shift (@_);
277 my ($maxnumeric) = @_;
278
279 $self->{'maxnumeric'} = $maxnumeric;
280}
281sub set_strip_html {
282 my $self = shift (@_);
283 my ($strip) = @_;
284
285 $self->{'strip_html'} = $strip;
286 $self->{'buildproc'}->set_strip_html($strip);
287}
288
289sub compress_text {
290 my $self = shift (@_);
291 my ($textindex) = @_;
292
293 print STDERR "compress_text() should be implemented in subclass!!";
294 return;
295}
296
297
298sub build_indexes {
299 my $self = shift (@_);
300 my ($indexname) = @_;
301 my $outhandle = $self->{'outhandle'};
302
303 my $indexes = [];
304 if (defined $indexname && $indexname =~ /\w/) {
305 push @$indexes, $indexname;
306 } else {
307 $indexes = $self->{'collect_cfg'}->{'indexes'};
308 }
309
310 # create the mapping between the index descriptions
311 # and their directory names (includes subcolls and langs)
312 $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
313
314 # build each of the indexes
315 foreach my $index (@$indexes) {
316 if ($self->want_built($index)) {
317 print $outhandle "\n*** building index $index in subdirectory " .
318 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
319 print STDERR "<Stage name='Index' source='$index'>\n" if $self->{'gli'};
320 $self->build_index($index);
321 } else {
322 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
323 }
324 }
325
326 $self->build_indexes_extra();
327
328}
329
330# implement this in subclass if want to do extra stuff at the end of building
331# all the indexes
332sub build_indexes_extra {
333 my $self = shift(@_);
334
335}
336
337sub build_index {
338 my $self = shift (@_);
339 my ($index) = @_;
340
341 print STDERR "build_index should be implemented in subclass\n";
342 return;
343}
344
345
346
347sub make_infodatabase {
348 my $self = shift (@_);
349 my $outhandle = $self->{'outhandle'};
350
351 print STDERR "BuildDir: $self->{'build_dir'}\n";
352
353 my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
354 my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc");
355 &util::mk_all_dir ($textdir);
356 &util::mk_all_dir ($assocdir);
357
358 # Get info database file path
359 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir);
360
361 print $outhandle "\n*** creating the info database and processing associated files\n"
362 if ($self->{'verbosity'} >= 1);
363 print STDERR "<Stage name='CreateInfoData'>\n" if $self->{'gli'};
364
365 # init all the classifiers
366 &classify::init_classifiers ($self->{'classifiers'});
367
368 my $reconstructed_docs = undef;
369 if ($self->{'keepold'}) {
370 # reconstruct doc_obj metadata from database for all docs
371 $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path);
372 }
373
374 # set up the document processor
375 my ($infodb_handle);
376 if ($self->{'debug'}) {
377 $infodb_handle = *STDOUT;
378 }
379 else {
380 $infodb_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path);
381 if (!defined($infodb_handle))
382 {
383 print STDERR "<FatalError name='NoRunText2DB'/>\n</Stage>\n" if $self->{'gli'};
384 die "builder::make_infodatabase - couldn't open infodb write handle\n";
385 }
386 }
387
388 $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'});
389 $self->{'buildproc'}->set_output_handle ($infodb_handle);
390 $self->{'buildproc'}->set_mode ('infodb');
391 $self->{'buildproc'}->set_assocdir ($assocdir);
392 $self->{'buildproc'}->set_dontdb ($self->{'dontdb'});
393 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
394 $self->{'buildproc'}->set_indexing_text (0);
395 $self->{'buildproc'}->set_store_text(1);
396 $self->{'buildproc'}->set_store_metadata_coverage ($self->{'collect_cfg'}->{'store_metadata_coverage'});
397
398 # make_infodatabase needs full reset even for incremental build
399 # as incremental works by reconstructing all docs from the database and
400 # then adding in the new ones
401 $self->{'buildproc'}->zero_reset();
402
403 $self->{'buildproc'}->{'mdprefix_fields'} = {};
404
405 if ($self->{'keepold'}) {
406 # create flat classify structure, ready for new docs to be added
407 foreach my $doc_obj ( @$reconstructed_docs ) {
408 print $outhandle " Adding reconstructed ", $doc_obj->get_OID(), " into classify structures\n";
409 $self->{'buildproc'}->process($doc_obj,undef);
410 }
411 }
412
413
414 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
415 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
416
417 # this has changed to only output collection meta if its
418 # not in the config file
419 $self->output_collection_meta($infodb_handle);
420
421 # output classification information
422 &classify::output_classify_info ($self->{'classifiers'}, $self->{'infodbtype'}, $infodb_handle,
423 $self->{'remove_empty_classifications'},
424 $self->{'gli'});
425
426 # Output classifier reverse lookup, used in incremental deletion
427 ####&classify::print_reverse_lookup($infodb_handle);
428
429 # output doclist
430 my @doc_list = $self->{'buildproc'}->get_doc_list();
431 my $browselist_infodb = { 'hastxt' => [ "0" ],
432 'childtype' => [ "VList" ],
433 'numleafdocs' => [ scalar(@doc_list) ],
434 'thistype' => [ "Invisible" ],
435 'contains' => [ join(";", @doc_list) ] };
436 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);
437
438 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_handle) if !$self->{'debug'};
439
440 print STDERR "</Stage>\n" if $self->{'gli'};
441}
442
443sub make_auxiliary_files {
444 my $self = shift (@_);
445 my ($index);
446 my $build_cfg = {};
447 # subclasses may have already defined stuff in here
448 if (defined $self->{'build_cfg'}) {
449 $build_cfg = $self->{'build_cfg'};
450 }
451
452 my $outhandle = $self->{'outhandle'};
453
454 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
455 print STDERR "<Stage name='CreatingAuxilary'>\n" if $self->{'gli'};
456
457 # get the text directory
458 &util::mk_all_dir ($self->{'build_dir'});
459
460 # store the build date
461 $build_cfg->{'builddate'} = time;
462 $build_cfg->{'buildtype'} = $self->{'buildtype'};
463 $build_cfg->{'indexstem'} = &util::get_dirsep_tail($self->{'collection'});
464 $build_cfg->{'stemindexes'} = $self->{'stemindexes'};
465 if ($self->{'separate_cjk'}) {
466 $build_cfg->{'separate_cjk'} = "true";
467 }
468
469 # store the number of documents and number of bytes
470 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
471 $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
472 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
473
474 # store the mapping between the index names and the directory names
475 # the index map is used to determine what indexes there are, so any that are not built should not be put into the map.
476 my @indexmap = ();
477 foreach my $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
478 if (not defined ($self->{'notbuilt'}->{$index})) {
479 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
480 }
481 }
482 $build_cfg->{'indexmap'} = \@indexmap if scalar (@indexmap);
483
484 my @subcollectionmap = ();
485 foreach my $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
486 push (@subcollectionmap, "$subcollection\-\>" .
487 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
488 }
489 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
490
491 my @languagemap = ();
492 foreach my $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
493 push (@languagemap, "$language\-\>" .
494 $self->{'index_mapping'}->{'languagemap'}->{$language});
495 }
496 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
497
498 my @notbuilt = ();
499 foreach my $nb (keys %{$self->{'notbuilt'}}) {
500 push (@notbuilt, $nb);
501 }
502 $build_cfg->{'notbuilt'} = \@notbuilt if scalar (@notbuilt);
503
504 $build_cfg->{'maxnumeric'} = $self->{'maxnumeric'};
505
506 $build_cfg->{'infodbtype'} = $self->{'infodbtype'};
507
508 $self->build_cfg_extra($build_cfg);
509
510 if ($gs_mode eq "gs2") {
511 &colcfg::write_build_cfg("$self->{'build_dir'}/build.cfg", $build_cfg);
512 }
513 if ($gs_mode eq "gs3") {
514
515 &colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg_preserve'}, $self->{'disable_OAI'});
516 }
517
518 print STDERR "</Stage>\n" if $self->{'gli'};
519}
520
521# implement this in subclass if want to add extra stuff to build.cfg
522sub build_cfg_extra {
523 my $self = shift(@_);
524 my ($build_cfg) = @_;
525
526}
527
528
529sub collect_specific {
530 my $self = shift (@_);
531}
532
533sub want_built {
534 my $self = shift (@_);
535 my ($index) = @_;
536
537 if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
538 foreach my $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
539 if ($index =~ /^$checkstr$/) {
540 $self->{'notbuilt'}->{$index} = 1;
541 return 0;
542 }
543 }
544 }
545
546 return 1;
547}
548
549sub create_index_mapping {
550 my $self = shift (@_);
551 my ($indexes) = @_;
552
553 print STDERR "create_index_mapping should be implemented in subclass\n";
554 my %mapping = ();
555 return \%mapping;
556}
557
558# returns a processed version of a field.
559# if the field has only one component the processed
560# version will contain the first character and next consonant
561# of that componant - otherwise it will contain the first
562# character of the first two components
563# only uses letdig (\w) characters now
564sub process_field {
565 my $self = shift (@_);
566 my ($field) = @_;
567
568 return "" unless (defined ($field) && $field =~ /\S/);
569
570 my ($a, $b);
571 my @components = split /,/, $field;
572 if (scalar @components >= 2) {
573 # pick the first letdig from the first two field names
574 ($a) = $components[0] =~ /^[^\w]*(\w)/;
575 ($b) = $components[1] =~ /^[^\w]*(\w)/;
576 } else {
577 # pick the first two letdig chars
578 ($a, $b) = $field =~ /^[^\w]*(\w)[^\w]*?(\w)/i;
579 }
580 # there may not have been any letdigs...
581 $a = 'a' unless defined $a;
582 $b = '0' unless defined $b;
583
584 return "$a$b";
585
586}
587
588sub get_next_version {
589 my $self = shift (@_);
590 my ($nameref) = @_;
591 my $num=0;
592 if ($$nameref =~ /(\d\d)$/) {
593 $num = $1; $num ++;
594 $$nameref =~ s/\d\d$/$num/;
595 } elsif ($$nameref =~ /(\d)$/) {
596 $num = $1;
597 if ($num == 9) {$$nameref =~ s/\d$/10/;}
598 else {$num ++; $$nameref =~ s/\d$/$num/;}
599 } else {
600 $$nameref =~ s/.$/0/;
601 }
602}
603
604
605
606sub get_collection_meta_sets
607{
608 my $self = shift(@_);
609 my $collection_infodb = shift(@_);
610
611 my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'};
612 foreach my $prefix (keys %$mdprefix_fields)
613 {
614 push(@{$collection_infodb->{"metadataset"}}, $prefix);
615
616 foreach my $field (keys %{$mdprefix_fields->{$prefix}})
617 {
618 push(@{$collection_infodb->{"metadatalist-$prefix"}}, $field);
619
620 my $val = $mdprefix_fields->{$prefix}->{$field};
621 push(@{$collection_infodb->{"metadatafreq-$prefix-$field"}}, $val);
622 }
623 }
624}
625
626
627# default is to output the metadata sets (prefixes) used in collection
628sub output_collection_meta
629{
630 my $self = shift(@_);
631 my $infodb_handle = shift(@_);
632
633 my %collection_infodb = ();
634 $self->get_collection_meta_sets(\%collection_infodb);
635 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "collection", \%collection_infodb);
636}
637
638# sometimes we need to read in an existing build.cfg - for example,
639# if doing each stage of building separately, or when doing incremental
640# building
641sub read_build_cfg {
642 my $self = shift(@_);
643
644 my $buildconfigfilename;
645
646 if ($gs_mode eq "gs2") {
647 $buildconfigfilename = "build.cfg";
648 } else {
649 $buildconfigfilename = "buildConfig.xml";
650 }
651
652 my $buildconfigfile = &util::filename_cat($self->{'build_dir'}, $buildconfigfilename);
653
654 if (!-e $buildconfigfile) {
655 # try the index dir - but do we know where it is?? try here
656 $buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "index", $buildconfigfilename);
657 if (!-e $buildconfigfile) {
658 #we cant find a config file - just ignore the field list
659 return undef;
660 }
661 }
662 if ($gs_mode eq "gs2") {
663 return &colcfg::read_build_cfg( $buildconfigfile);
664 }
665 print STDERR "read build cfg basebuilder***\n";
666 return &buildConfigxml::read_cfg_file($buildconfigfile);
667
668
669}
670
671sub print_stats {
672 my $self = shift (@_);
673
674 my $outhandle = $self->{'outhandle'};
675 my $indexing_text = $self->{'buildproc'}->get_indexing_text();
676 my $index = $self->{'buildproc'}->get_index();
677 my $num_bytes = $self->{'buildproc'}->get_num_bytes();
678 my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
679
680 if ($indexing_text) {
681 print $outhandle "Stats (Creating index $index)\n";
682 } else {
683 print $outhandle "Stats (Compressing text from $index)\n";
684 }
685 print $outhandle "Total bytes in collection: $num_bytes\n";
686 print $outhandle "Total bytes in $index: $num_processed_bytes\n";
687
688 if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) {
689
690 if ($self->{'keepold'}) {
691 if ($num_processed_bytes == 0) {
692 if ($indexing_text) {
693 print $outhandle "No additional text was added to $index\n";
694 } elsif (!$self->{'no_text'}) {
695 print $outhandle "No additional text was compressed\n";
696 }
697 }
698 }
699 else {
700 print $outhandle "***************\n";
701 if ($indexing_text) {
702 print $outhandle "WARNING: There is very little or no text to process for $index\n";
703 } elsif (!$self->{'no_text'}) {
704 print $outhandle "WARNING: There is very little or no text to compress\n";
705 }
706 print $outhandle " Was this your intention?\n";
707 print $outhandle "***************\n";
708 }
709
710 }
711
712}
713
714
7151;
716
Note: See TracBrowser for help on using the repository browser.