source: gsdl/trunk/perllib/basebuilder.pm@ 17740

Last change on this file since 17740 was 17573, checked in by kjdon, 16 years ago

moved a couple of things around, added read_build_cfg which finds and reads in build.cfg file, added init_for_incremental_build

  • Property svn:keywords set to Author Date Id Revision
File size: 22.1 KB
Line 
1###########################################################################
2#
3# basebuilder.pm -- base class for collection builders
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package basebuilder;
27
28use strict;
29no strict 'refs'; # allow filehandles to be variables and viceversa
30
31use classify;
32use cfgread;
33use colcfg;
34use dbutil;
35use plugin;
36use util;
37
38
39BEGIN {
40 # set autoflush on for STDERR and STDOUT so that mgpp
41 # doesn't get out of sync with plugins
42 STDOUT->autoflush(1);
43 STDERR->autoflush(1);
44}
45
46END {
47 STDOUT->autoflush(0);
48 STDERR->autoflush(0);
49}
50
51our $maxdocsize = 12000;
52
53# used to signify "gs2"(default) or "gs3"
54my $gs_mode = "gs2";
55
56sub new {
57 my ($class, $collection, $source_dir, $build_dir, $verbosity,
58 $maxdocs, $debug, $keepold, $incremental,
59 $remove_empty_classifications,
60 $outhandle, $no_text, $failhandle, $gli, $disable_OAI) = @_;
61
62 $outhandle = *STDERR unless defined $outhandle;
63 $no_text = 0 unless defined $no_text;
64 $failhandle = *STDERR unless defined $failhandle;
65
66 # create a builder object
67 my $self = bless {'collection'=>$collection,
68 'source_dir'=>$source_dir,
69 'build_dir'=>$build_dir,
70 'verbosity'=>$verbosity,
71 'maxdocs'=>$maxdocs,
72 'debug'=>$debug,
73 'keepold'=>$keepold,
74 'incremental'=>$incremental,
75 'remove_empty_classifications'=>$remove_empty_classifications,
76 'outhandle'=>$outhandle,
77 'no_text'=>$no_text,
78 'failhandle'=>$failhandle,
79 'notbuilt'=>{}, # indexes not built
80 'gli'=>$gli,
81 'disable_OAI'=>$disable_OAI
82 }, $class;
83
84 $self->{'gli'} = 0 unless defined $self->{'gli'};
85
86 # disable_OIA applies to greenstone 3 only and is only passed to &colcfg::write_build_cfg_xml (then cfgread4gs3::write_cfg_file) when writing the buildConfig.xml
87 $self->{'disable_OAI'} = 0 unless defined $self->{'disable_OAI'};
88
89 # Read in the collection configuration file.
90 my ($colcfgname);
91 ($colcfgname, $gs_mode) = &colcfg::get_collect_cfg_name($outhandle);
92 if ($gs_mode eq "gs2") {
93 $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname);
94 } elsif ($gs_mode eq "gs3") {
95 $self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname);
96
97 #this $self->{'collect_cfg_preserve'} is used for gs3 only and to be passed to &colcfg::write_build_cfg_xml in sub make_auxilary_files later in this basebuilder.pm, we use this preserve object because $self->{'collect_cfg'}->{'classify'} somewhat gets modified during the calling of &classify::load_classifiers.
98 $self->{'collect_cfg_preserve'} = &colcfg::read_collection_cfg_xml ($colcfgname);
99 }
100
101 # get the database type for this collection from the collect.cfg file (may be undefined)
102 $self->{'infodbtype'} = $self->{'collect_cfg'}->{'infodbtype'} || &dbutil::get_default_infodb_type();
103
104 # get the list of plugins for this collection
105 my $plugins = [];
106 if (defined $self->{'collect_cfg'}->{'plugin'}) {
107 $plugins = $self->{'collect_cfg'}->{'plugin'};
108 }
109
110 # load all the plugins
111
112 #build up the extra global options for the plugins
113 my @global_opts = ();
114 if (defined $self->{'collect_cfg'}->{'separate_cjk'} && $self->{'collect_cfg'}->{'separate_cjk'} =~ /^true$/i) {
115 push @global_opts, "-separate_cjk";
116 }
117 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle, $failhandle, \@global_opts, $keepold);
118
119 if (scalar(@{$self->{'pluginfo'}}) == 0) {
120 print $outhandle "No plugins were loaded.\n";
121 die "\n";
122 }
123
124 # get the list of classifiers for this collection
125 my $classifiers = [];
126 if (defined $self->{'collect_cfg'}->{'classify'}) {
127 $classifiers = $self->{'collect_cfg'}->{'classify'};
128 }
129
130 # load all the classifiers
131 $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $build_dir, $outhandle);
132
133 # load up any dontdb fields
134 $self->{'dontdb'} = {};
135 if (defined ($self->{'collect_cfg'}->{'dontgdbm'})) {
136 foreach my $dg (@{$self->{'collect_cfg'}->{'dontgdbm'}}) {
137 $self->{'dontdb'}->{$dg} = 1;
138 }
139 }
140
141 $self->{'maxnumeric'} = 4;
142 return $self;
143}
144
145# stuff has been moved here from new, so we can use subclass methods
146sub init {
147 my $self = shift(@_);
148
149 $self->generate_index_list();
150
151 # sort out subcollection indexes
152 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
153 my $indexes = $self->{'collect_cfg'}->{'indexes'};
154 $self->{'collect_cfg'}->{'indexes'} = [];
155 foreach my $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
156 foreach my $index (@$indexes) {
157 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
158 }
159 }
160 }
161
162 # sort out language subindexes
163 if (defined $self->{'collect_cfg'}->{'languages'}) {
164 my $indexes = $self->{'collect_cfg'}->{'indexes'};
165 $self->{'collect_cfg'}->{'indexes'} = [];
166 foreach my $language (@{$self->{'collect_cfg'}->{'languages'}}) {
167 foreach my $index (@$indexes) {
168 if (defined ($self->{'collect_cfg'}->{'indexsubcollections'})) {
169 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
170 }
171 else { # add in an empty subcollection field
172 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index\:\:$language");
173 }
174 }
175 }
176 }
177
178 if (defined($self->{'collect_cfg'}->{'indexes'})) {
179 # make sure that the same index isn't specified more than once
180 my %tmphash = ();
181 my @tmparray = @{$self->{'collect_cfg'}->{'indexes'}};
182 $self->{'collect_cfg'}->{'indexes'} = [];
183 foreach my $i (@tmparray) {
184 if (!defined ($tmphash{$i})) {
185 push (@{$self->{'collect_cfg'}->{'indexes'}}, $i);
186 $tmphash{$i} = 1;
187 }
188 }
189 } else {
190 $self->{'collect_cfg'}->{'indexes'} = [];
191 }
192
193 # load up the document processor for building
194 # if a buildproc class has been created for this collection, use it
195 # otherwise, use the mg buildproc
196 my ($buildprocdir, $buildproctype);
197 my $collection = $self->{'collection'};
198 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuildproc.pm") {
199 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
200 $buildproctype = "custombuildproc";
201 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuildproc.pm") {
202 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
203 $buildproctype = "custombuildproc";
204 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
205 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
206 $buildproctype = "${collection}buildproc";
207 } else {
208 $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
209 $buildproctype = $self->default_buildproc();
210 }
211 require "$buildprocdir/$buildproctype.pm";
212
213 eval("\$self->{'buildproc'} = new $buildproctype(\$self->{'collection'}, " .
214 "\$self->{'source_dir'}, \$self->{'build_dir'}, \$self->{'keepold'}, \$self->{'verbosity'}, \$self->{'outhandle'})");
215 die "$@" if $@;
216
217
218 $self->generate_index_options();
219
220 if (!$self->{'debug'} && !$self->{'keepold'}) {
221 # remove any old builds
222 &util::rm_r($self->{'build_dir'});
223 &util::mk_all_dir($self->{'build_dir'});
224
225 # make the text directory
226 my $textdir = "$self->{'build_dir'}/text";
227 &util::mk_all_dir($textdir);
228 }
229
230 if ($self->{'incremental'}) {
231 # some classes may need to do some additional initialisation
232 $self->init_for_incremental_build();
233 }
234
235}
236
237# implement this in subclass if want to do additional initialisation for an
238# incremental build
239sub init_for_incremental_build {
240 my $self = shift (@_);
241}
242
243sub deinit {
244 my $self = shift (@_);
245
246 &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
247}
248
249sub generate_index_options {
250 my $self = shift (@_);
251
252 my $separate_cjk = 0;
253
254 if (defined($self->{'collect_cfg'}->{'indexoptions'})) {
255 foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) {
256 if ($option =~ /separate_cjk/) {
257 $separate_cjk = 1;
258 }
259 }
260 }
261 # set this for building
262 $self->{'buildproc'}->set_separate_cjk($separate_cjk);
263 # record it for build.cfg
264 $self->{'separate_cjk'} = $separate_cjk;
265}
266
267sub set_sections_index_document_metadata {
268 my $self = shift (@_);
269 my ($index) = @_;
270
271 $self->{'buildproc'}->set_sections_index_document_metadata($index);
272}
273
274sub set_maxnumeric {
275 my $self = shift (@_);
276 my ($maxnumeric) = @_;
277
278 $self->{'maxnumeric'} = $maxnumeric;
279}
280sub set_strip_html {
281 my $self = shift (@_);
282 my ($strip) = @_;
283
284 $self->{'strip_html'} = $strip;
285 $self->{'buildproc'}->set_strip_html($strip);
286}
287
288sub compress_text {
289 my $self = shift (@_);
290 my ($textindex) = @_;
291
292 print STDERR "compress_text() should be implemented in subclass!!";
293 return;
294}
295
296
297sub build_indexes {
298 my $self = shift (@_);
299 my ($indexname) = @_;
300 my $outhandle = $self->{'outhandle'};
301
302 my $indexes = [];
303 if (defined $indexname && $indexname =~ /\w/) {
304 push @$indexes, $indexname;
305 } else {
306 $indexes = $self->{'collect_cfg'}->{'indexes'};
307 }
308
309 # create the mapping between the index descriptions
310 # and their directory names (includes subcolls and langs)
311 $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
312
313 # build each of the indexes
314 foreach my $index (@$indexes) {
315 if ($self->want_built($index)) {
316 print $outhandle "\n*** building index $index in subdirectory " .
317 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
318 print STDERR "<Stage name='Index' source='$index'>\n" if $self->{'gli'};
319 $self->build_index($index);
320 } else {
321 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
322 }
323 }
324
325 $self->build_indexes_extra();
326
327}
328
329# implement this in subclass if want to do extra stuff at the end of building
330# all the indexes
331sub build_indexes_extra {
332 my $self = shift(@_);
333
334}
335
336sub build_index {
337 my $self = shift (@_);
338 my ($index) = @_;
339
340 print STDERR "build_index should be implemented in subclass\n";
341 return;
342}
343
344
345
346sub make_infodatabase {
347 my $self = shift (@_);
348 my $outhandle = $self->{'outhandle'};
349
350 print STDERR "BuildDir: $self->{'build_dir'}\n";
351
352 my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
353 my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc");
354 &util::mk_all_dir ($textdir);
355 &util::mk_all_dir ($assocdir);
356
357 # Get info database file path
358 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir);
359
360 print $outhandle "\n*** creating the info database and processing associated files\n"
361 if ($self->{'verbosity'} >= 1);
362 print STDERR "<Stage name='CreateInfoData'>\n" if $self->{'gli'};
363
364 # init all the classifiers
365 &classify::init_classifiers ($self->{'classifiers'});
366
367 my $reconstructed_docs = undef;
368 if ($self->{'keepold'}) {
369 # reconstruct doc_obj metadata from database for all docs
370 $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path);
371 }
372
373 # set up the document processor
374 my ($infodb_handle);
375 if ($self->{'debug'}) {
376 $infodb_handle = *STDOUT;
377 }
378 else {
379 $infodb_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path);
380 if (!defined($infodb_handle))
381 {
382 print STDERR "<FatalError name='NoRunText2DB'/>\n</Stage>\n" if $self->{'gli'};
383 die "builder::make_infodatabase - couldn't open infodb write handle\n";
384 }
385 }
386
387 $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'});
388 $self->{'buildproc'}->set_output_handle ($infodb_handle);
389 $self->{'buildproc'}->set_mode ('infodb');
390 $self->{'buildproc'}->set_assocdir ($assocdir);
391 $self->{'buildproc'}->set_dontdb ($self->{'dontdb'});
392 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
393 $self->{'buildproc'}->set_indexing_text (0);
394 $self->{'buildproc'}->set_store_text(1);
395 $self->{'buildproc'}->set_store_metadata_coverage ($self->{'collect_cfg'}->{'store_metadata_coverage'});
396
397 # make_infodatabase needs full reset even for incremental build
398 # as incremental works by reconstructing all docs from the database and
399 # then adding in the new ones
400 $self->{'buildproc'}->zero_reset();
401
402 $self->{'buildproc'}->{'mdprefix_fields'} = {};
403
404 if ($self->{'keepold'}) {
405 # create flat classify structure, ready for new docs to be added
406 foreach my $doc_obj ( @$reconstructed_docs ) {
407 print $outhandle " Adding reconstructed ", $doc_obj->get_OID(), " into classify structures\n";
408 $self->{'buildproc'}->process($doc_obj,undef);
409 }
410 }
411
412
413 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
414 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
415
416 # this has changed to only output collection meta if its
417 # not in the config file
418 $self->output_collection_meta($infodb_handle);
419
420 # output classification information
421 &classify::output_classify_info ($self->{'classifiers'}, $self->{'infodbtype'}, $infodb_handle,
422 $self->{'remove_empty_classifications'},
423 $self->{'gli'});
424
425 # Output classifier reverse lookup, used in incremental deletion
426 #&classify::print_reverse_lookup($infodb_handle);
427
428 # output doclist
429 my @doc_list = $self->{'buildproc'}->get_doc_list();
430 my $browselist_infodb = { 'hastxt' => [ "0" ],
431 'childtype' => [ "VList" ],
432 'numleafdocs' => [ scalar(@doc_list) ],
433 'thistype' => [ "Invisible" ],
434 'contains' => [ join(";", @doc_list) ] };
435 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);
436
437 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_handle) if !$self->{'debug'};
438
439 print STDERR "</Stage>\n" if $self->{'gli'};
440}
441
442sub make_auxiliary_files {
443 my $self = shift (@_);
444 my ($index);
445 my $build_cfg = {};
446 # subclasses may have already defined stuff in here
447 if (defined $self->{'build_cfg'}) {
448 $build_cfg = $self->{'build_cfg'};
449 }
450
451 my $outhandle = $self->{'outhandle'};
452
453 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
454 print STDERR "<Stage name='CreatingAuxilary'>\n" if $self->{'gli'};
455
456 # get the text directory
457 &util::mk_all_dir ($self->{'build_dir'});
458
459 # store the build date
460 $build_cfg->{'builddate'} = time;
461 $build_cfg->{'buildtype'} = $self->{'buildtype'};
462 $build_cfg->{'indexstem'} = &util::get_dirsep_tail($self->{'collection'});
463 $build_cfg->{'stemindexes'} = $self->{'stemindexes'};
464 if ($self->{'separate_cjk'}) {
465 $build_cfg->{'separate_cjk'} = "true";
466 }
467
468 # store the number of documents and number of bytes
469 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
470 $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
471 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
472
473 # store the mapping between the index names and the directory names
474 # the index map is used to determine what indexes there are, so any that are not built should not be put into the map.
475 my @indexmap = ();
476 foreach my $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
477 if (not defined ($self->{'notbuilt'}->{$index})) {
478 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
479 }
480 }
481 $build_cfg->{'indexmap'} = \@indexmap if scalar (@indexmap);
482
483 my @subcollectionmap = ();
484 foreach my $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
485 push (@subcollectionmap, "$subcollection\-\>" .
486 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
487 }
488 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
489
490 my @languagemap = ();
491 foreach my $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
492 push (@languagemap, "$language\-\>" .
493 $self->{'index_mapping'}->{'languagemap'}->{$language});
494 }
495 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
496
497 my @notbuilt = ();
498 foreach my $nb (keys %{$self->{'notbuilt'}}) {
499 push (@notbuilt, $nb);
500 }
501 $build_cfg->{'notbuilt'} = \@notbuilt if scalar (@notbuilt);
502
503 $build_cfg->{'maxnumeric'} = $self->{'maxnumeric'};
504
505 $build_cfg->{'infodbtype'} = $self->{'infodbtype'};
506
507 $self->build_cfg_extra($build_cfg);
508
509 if ($gs_mode eq "gs2") {
510 &colcfg::write_build_cfg("$self->{'build_dir'}/build.cfg", $build_cfg);
511 }
512 if ($gs_mode eq "gs3") {
513
514 &colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg_preserve'}, $self->{'disable_OAI'});
515 }
516
517 print STDERR "</Stage>\n" if $self->{'gli'};
518}
519
520# implement this in subclass if want to add extra stuff to build.cfg
521sub build_cfg_extra {
522 my $self = shift(@_);
523 my ($build_cfg) = @_;
524
525}
526
527
528sub collect_specific {
529 my $self = shift (@_);
530}
531
532sub want_built {
533 my $self = shift (@_);
534 my ($index) = @_;
535
536 if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
537 foreach my $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
538 if ($index =~ /^$checkstr$/) {
539 $self->{'notbuilt'}->{$index} = 1;
540 return 0;
541 }
542 }
543 }
544
545 return 1;
546}
547
548sub create_index_mapping {
549 my $self = shift (@_);
550 my ($indexes) = @_;
551
552 print STDERR "create_index_mapping should be implemented in subclass\n";
553 my %mapping = ();
554 return \%mapping;
555}
556
557# returns a processed version of a field.
558# if the field has only one component the processed
559# version will contain the first character and next consonant
560# of that componant - otherwise it will contain the first
561# character of the first two components
562# only uses letdig (\w) characters now
563sub process_field {
564 my $self = shift (@_);
565 my ($field) = @_;
566
567 return "" unless (defined ($field) && $field =~ /\S/);
568
569 my ($a, $b);
570 my @components = split /,/, $field;
571 if (scalar @components >= 2) {
572 # pick the first letdig from the first two field names
573 ($a) = $components[0] =~ /^[^\w]*(\w)/;
574 ($b) = $components[1] =~ /^[^\w]*(\w)/;
575 } else {
576 # pick the first two letdig chars
577 ($a, $b) = $field =~ /^[^\w]*(\w)[^\w]*?(\w)/i;
578 }
579 # there may not have been any letdigs...
580 $a = 'a' unless defined $a;
581 $b = '0' unless defined $b;
582
583 return "$a$b";
584
585}
586
587sub get_next_version {
588 my $self = shift (@_);
589 my ($nameref) = @_;
590 my $num=0;
591 if ($$nameref =~ /(\d\d)$/) {
592 $num = $1; $num ++;
593 $$nameref =~ s/\d\d$/$num/;
594 } elsif ($$nameref =~ /(\d)$/) {
595 $num = $1;
596 if ($num == 9) {$$nameref =~ s/\d$/10/;}
597 else {$num ++; $$nameref =~ s/\d$/$num/;}
598 } else {
599 $$nameref =~ s/.$/0/;
600 }
601}
602
603
604
605sub get_collection_meta_sets
606{
607 my $self = shift(@_);
608 my $collection_infodb = shift(@_);
609
610 my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'};
611 foreach my $prefix (keys %$mdprefix_fields)
612 {
613 push(@{$collection_infodb->{"metadataset"}}, $prefix);
614
615 foreach my $field (keys %{$mdprefix_fields->{$prefix}})
616 {
617 push(@{$collection_infodb->{"metadatalist-$prefix"}}, $field);
618
619 my $val = $mdprefix_fields->{$prefix}->{$field};
620 push(@{$collection_infodb->{"metadatafreq-$prefix-$field"}}, $val);
621 }
622 }
623}
624
625
626# default is to output the metadata sets (prefixes) used in collection
627sub output_collection_meta
628{
629 my $self = shift(@_);
630 my $infodb_handle = shift(@_);
631
632 my %collection_infodb = ();
633 $self->get_collection_meta_sets(\%collection_infodb);
634 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "collection", \%collection_infodb);
635}
636
637# sometimes we need to read in an existing build.cfg - for example,
638# if doing each stage of building separately, or when doing incremental
639# building
640sub read_build_cfg {
641 my $self = shift(@_);
642
643 my $buildconfigfile = &util::filename_cat($self->{'build_dir'}, "build.cfg");
644
645 if (!-e $buildconfigfile) {
646 # try the index dir - but do we know where it is?? try here
647 $buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "index", "build.cfg");
648 if (!-e $buildconfigfile) {
649 #we cant find a config file - just ignore the field list
650 return undef;
651 }
652 }
653
654 my $buildcfg = &colcfg::read_build_cfg( $buildconfigfile);
655 return $buildcfg;
656
657}
658
659sub print_stats {
660 my $self = shift (@_);
661
662 my $outhandle = $self->{'outhandle'};
663 my $indexing_text = $self->{'buildproc'}->get_indexing_text();
664 my $index = $self->{'buildproc'}->get_index();
665 my $num_bytes = $self->{'buildproc'}->get_num_bytes();
666 my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
667
668 if ($indexing_text) {
669 print $outhandle "Stats (Creating index $index)\n";
670 } else {
671 print $outhandle "Stats (Compressing text from $index)\n";
672 }
673 print $outhandle "Total bytes in collection: $num_bytes\n";
674 print $outhandle "Total bytes in $index: $num_processed_bytes\n";
675
676 if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) {
677
678 if ($self->{'keepold'}) {
679 if ($num_processed_bytes == 0) {
680 if ($indexing_text) {
681 print $outhandle "No additional text was added to $index\n";
682 } elsif (!$self->{'no_text'}) {
683 print $outhandle "No additional text was compressed\n";
684 }
685 }
686 }
687 else {
688 print $outhandle "***************\n";
689 if ($indexing_text) {
690 print $outhandle "WARNING: There is very little or no text to process for $index\n";
691 } elsif (!$self->{'no_text'}) {
692 print $outhandle "WARNING: There is very little or no text to compress\n";
693 }
694 print $outhandle " Was this your intention?\n";
695 print $outhandle "***************\n";
696 }
697
698 }
699
700}
701
702
7031;
704
Note: See TracBrowser for help on using the repository browser.