root/gsdl/trunk/perllib/basebuilder.pm @ 20100

Revision 20100, 22.2 KB (checked in by kjdon, 11 years ago)

small changes to do with calls to colcfg methods

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# basebuilder.pm -- base class for collection builders
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package basebuilder;
27
28use strict;
29no strict 'refs'; # allow filehandles to be variables and viceversa
30
31use classify;
32use cfgread;
33use colcfg;
34use dbutil;
35use plugin;
36use util;
37
38
39BEGIN {
40    # set autoflush on for STDERR and STDOUT so that mgpp
41    # doesn't get out of sync with plugins
42    STDOUT->autoflush(1);
43    STDERR->autoflush(1);
44}
45
46END {
47    STDOUT->autoflush(0);
48    STDERR->autoflush(0);
49}
50
51our $maxdocsize = 12000;
52
53# used to signify "gs2"(default) or "gs3"
54our $gs_mode = "gs2";
55
56sub new {
57    my ($class, $collection, $source_dir, $build_dir, $verbosity,
58    $maxdocs, $debug, $keepold, $incremental,
59    $remove_empty_classifications,
60    $outhandle, $no_text, $failhandle, $gli, $disable_OAI) = @_;
61
62    $outhandle = *STDERR unless defined $outhandle;
63    $no_text = 0 unless defined $no_text;
64    $failhandle = *STDERR unless defined $failhandle;
65
66    # create a builder object
67    my $self = bless {'collection'=>$collection,
68              'source_dir'=>$source_dir,
69              'build_dir'=>$build_dir,
70              'verbosity'=>$verbosity,
71              'maxdocs'=>$maxdocs,
72              'debug'=>$debug,
73              'keepold'=>$keepold,
74              'incremental'=>$incremental,
75              'remove_empty_classifications'=>$remove_empty_classifications,
76              'outhandle'=>$outhandle,
77              'no_text'=>$no_text,
78              'failhandle'=>$failhandle,
79              'notbuilt'=>{},    # indexes not built
80              'gli'=>$gli,
81              'disable_OAI'=>$disable_OAI
82              }, $class;
83
84    $self->{'gli'} = 0 unless defined $self->{'gli'};
85   
86    # disable_OAI applies to greenstone 3 only and is only passed to &colcfg::write_build_cfg_xml (then buildConfigxml::write_build_cfg_file) when writing the buildConfig.xml
87    $self->{'disable_OAI'} = 0 unless defined $self->{'disable_OAI'};
88
89    # Read in the collection configuration file.
90    my ($colcfgname);
91    ($colcfgname, $gs_mode) = &colcfg::get_collect_cfg_name($outhandle);
92    $self->{'collect_cfg'} = &colcfg::read_collection_cfg ($colcfgname, $gs_mode);
93
94    if ($gs_mode eq "gs3") {
95    # read it in again to save the original form for later writing out
96    # of buildConfig.xml
97    # we use this preserve object because $self->{'collect_cfg'}->{'classify'} somewhat gets modified during the calling of &classify::load_classifiers.
98    $self->{'collect_cfg_preserve'} = &colcfg::read_collection_cfg ($colcfgname, $gs_mode);
99    }
100   
101    # get the database type for this collection from the collect.cfg file (may be undefined)
102    $self->{'infodbtype'} = $self->{'collect_cfg'}->{'infodbtype'} || &dbutil::get_default_infodb_type();
103
104    # get the list of plugins for this collection
105    my $plugins = [];
106    if (defined $self->{'collect_cfg'}->{'plugin'}) {
107    $plugins = $self->{'collect_cfg'}->{'plugin'};
108    }
109   
110    # load all the plugins
111
112    #build up the extra global options for the plugins
113    my @global_opts = ();
114    if (defined $self->{'collect_cfg'}->{'separate_cjk'} && $self->{'collect_cfg'}->{'separate_cjk'} =~ /^true$/i) {
115    push @global_opts, "-separate_cjk";
116    }
117    $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle, $failhandle, \@global_opts, $keepold);
118   
119    if (scalar(@{$self->{'pluginfo'}}) == 0) {
120    print $outhandle "No plugins were loaded.\n";
121    die "\n";
122    }
123
124    # get the list of classifiers for this collection
125    my $classifiers = [];
126    if (defined $self->{'collect_cfg'}->{'classify'}) {
127    $classifiers = $self->{'collect_cfg'}->{'classify'};
128    }
129
130    # load all the classifiers
131    $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $build_dir, $outhandle);
132
133    # load up any dontdb fields
134    $self->{'dontdb'} = {};
135    if (defined ($self->{'collect_cfg'}->{'dontgdbm'})) {
136    foreach my $dg (@{$self->{'collect_cfg'}->{'dontgdbm'}}) {
137        $self->{'dontdb'}->{$dg} = 1;
138    }
139    }
140
141    $self->{'maxnumeric'} = 4;
142    return $self;
143}
144
145# stuff has been moved here from new, so we can use subclass methods
146sub init {
147    my $self = shift(@_);
148   
149    $self->generate_index_list();
150    my $indexes = $self->{'collect_cfg'}->{'indexes'};
151    if (defined $indexes) {
152    # sort out subcollection indexes
153    if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
154        $self->{'collect_cfg'}->{'indexes'} = [];
155        foreach my $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
156        foreach my $index (@$indexes) {
157            push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
158        }
159        }
160    }
161   
162    # sort out language subindexes
163    if (defined $self->{'collect_cfg'}->{'languages'}) {
164        $indexes = $self->{'collect_cfg'}->{'indexes'};
165        $self->{'collect_cfg'}->{'indexes'} = [];
166        foreach my $language (@{$self->{'collect_cfg'}->{'languages'}}) {
167        foreach my $index (@$indexes) {
168            if (defined ($self->{'collect_cfg'}->{'indexsubcollections'})) {
169            push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
170            }
171            else { # add in an empty subcollection field
172            push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index\:\:$language");
173            }
174        }
175        }
176    }
177    }
178   
179    if (defined($self->{'collect_cfg'}->{'indexes'})) {
180    # make sure that the same index isn't specified more than once
181    my %tmphash = ();
182    my @tmparray = @{$self->{'collect_cfg'}->{'indexes'}};
183    $self->{'collect_cfg'}->{'indexes'} = [];
184    foreach my $i (@tmparray) {
185        if (!defined ($tmphash{$i})) {
186        push (@{$self->{'collect_cfg'}->{'indexes'}}, $i);
187        $tmphash{$i} = 1;
188        }
189    }
190    } else {
191    $self->{'collect_cfg'}->{'indexes'} = [];
192    }
193
194    # load up the document processor for building
195    # if a buildproc class has been created for this collection, use it
196    # otherwise, use the mg buildproc
197    my ($buildprocdir, $buildproctype);
198    my $collection = $self->{'collection'};
199    if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuildproc.pm") {
200    $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
201    $buildproctype = "custombuildproc";
202    } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuildproc.pm") {
203    $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
204    $buildproctype = "custombuildproc";
205    } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
206    $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
207    $buildproctype = "${collection}buildproc";
208    } else {
209    $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
210    $buildproctype = $self->default_buildproc();
211    }
212    require "$buildprocdir/$buildproctype.pm";
213
214    eval("\$self->{'buildproc'} = new $buildproctype(\$self->{'collection'}, " .
215     "\$self->{'source_dir'}, \$self->{'build_dir'}, \$self->{'keepold'}, \$self->{'verbosity'}, \$self->{'outhandle'})");
216    die "$@" if $@;
217
218   
219   $self->generate_index_options();
220
221    if (!$self->{'debug'} && !$self->{'keepold'}) {
222    # remove any old builds
223    &util::rm_r($self->{'build_dir'});
224    &util::mk_all_dir($self->{'build_dir'});
225       
226    # make the text directory
227    my $textdir = "$self->{'build_dir'}/text";
228    &util::mk_all_dir($textdir);
229    }
230
231    if ($self->{'incremental'}) {
232    # some classes may need to do some additional initialisation
233    $self->init_for_incremental_build();
234    }
235   
236}
237
238# implement this in subclass if want to do additional initialisation for an
239# incremental build
240sub init_for_incremental_build {
241    my $self = shift (@_);
242}
243
244sub deinit {
245    my $self = shift (@_);
246   
247    &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
248}
249
250sub generate_index_options {
251    my $self = shift (@_);
252
253    my $separate_cjk = 0;
254   
255    if (defined($self->{'collect_cfg'}->{'indexoptions'})) {
256    foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) {
257        if ($option =~ /separate_cjk/) {
258        $separate_cjk = 1;
259        }
260    }
261    }
262    # set this for building
263    $self->{'buildproc'}->set_separate_cjk($separate_cjk);
264    # record it for build.cfg
265    $self->{'separate_cjk'} = $separate_cjk;
266}
267 
268sub set_sections_index_document_metadata {
269    my $self = shift (@_);
270    my ($index) = @_;
271 
272    $self->{'buildproc'}->set_sections_index_document_metadata($index);
273}
274
275sub set_maxnumeric {
276    my $self = shift (@_);
277    my ($maxnumeric) = @_;
278
279    $self->{'maxnumeric'} = $maxnumeric;
280}
281sub set_strip_html {
282    my $self = shift (@_);
283    my ($strip) = @_;
284   
285    $self->{'strip_html'} = $strip;
286    $self->{'buildproc'}->set_strip_html($strip);
287}
288
289sub compress_text {
290    my $self = shift (@_);
291    my ($textindex) = @_;
292
293    print STDERR "compress_text() should be implemented in subclass!!";
294    return;
295}
296
297
298sub build_indexes {
299    my $self = shift (@_);
300    my ($indexname) = @_;
301    my $outhandle = $self->{'outhandle'};
302
303    my $indexes = [];
304    if (defined $indexname && $indexname =~ /\w/) {
305    push @$indexes, $indexname;
306    } else {
307    $indexes = $self->{'collect_cfg'}->{'indexes'};
308    }
309
310    # create the mapping between the index descriptions
311    # and their directory names (includes subcolls and langs)
312    $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
313   
314    # build each of the indexes
315    foreach my $index (@$indexes) {
316    if ($self->want_built($index)) {
317        print $outhandle "\n*** building index $index in subdirectory " .
318        "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
319        print STDERR "<Stage name='Index' source='$index'>\n" if $self->{'gli'};
320        $self->build_index($index);
321    } else {
322        print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
323    }
324    }
325
326    $self->build_indexes_extra();
327
328}
329
330# implement this in subclass if want to do extra stuff at the end of building
331# all the indexes
332sub build_indexes_extra {
333    my $self = shift(@_);
334   
335}
336
337sub build_index {
338    my $self = shift (@_);
339    my ($index) = @_;
340   
341    print STDERR "build_index should be implemented in subclass\n";
342    return;
343}
344
345
346
347sub make_infodatabase {
348    my $self = shift (@_);
349    my $outhandle = $self->{'outhandle'};
350
351    print STDERR "BuildDir: $self->{'build_dir'}\n";
352
353    my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
354    my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc");
355    &util::mk_all_dir ($textdir);
356    &util::mk_all_dir ($assocdir);
357
358    # Get info database file path
359    my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir);
360
361    print $outhandle "\n*** creating the info database and processing associated files\n"
362    if ($self->{'verbosity'} >= 1);
363    print STDERR "<Stage name='CreateInfoData'>\n" if $self->{'gli'};
364
365    # init all the classifiers
366    &classify::init_classifiers ($self->{'classifiers'});
367
368    my $reconstructed_docs = undef;
369    if ($self->{'keepold'}) {
370    # reconstruct doc_obj metadata from database for all docs
371    $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path);
372    }
373   
374    # set up the document processor
375    my ($infodb_handle);
376    if ($self->{'debug'}) {
377    $infodb_handle = *STDOUT;
378    }
379    else {
380    $infodb_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path);
381    if (!defined($infodb_handle))
382    {
383        print STDERR "<FatalError name='NoRunText2DB'/>\n</Stage>\n" if $self->{'gli'};
384        die "builder::make_infodatabase - couldn't open infodb write handle\n";
385    }
386    }
387
388    $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'});
389    $self->{'buildproc'}->set_output_handle ($infodb_handle);
390    $self->{'buildproc'}->set_mode ('infodb');
391    $self->{'buildproc'}->set_assocdir ($assocdir);
392    $self->{'buildproc'}->set_dontdb ($self->{'dontdb'});
393    $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
394    $self->{'buildproc'}->set_indexing_text (0);
395    $self->{'buildproc'}->set_store_text(1);
396    $self->{'buildproc'}->set_store_metadata_coverage ($self->{'collect_cfg'}->{'store_metadata_coverage'});
397
398    # make_infodatabase needs full reset even for incremental build
399    # as incremental works by reconstructing all docs from the database and
400    # then adding in the new ones
401    $self->{'buildproc'}->zero_reset();
402
403    $self->{'buildproc'}->{'mdprefix_fields'} = {};
404
405    if ($self->{'keepold'}) {
406    # create flat classify structure, ready for new docs to be added
407    foreach my $doc_obj ( @$reconstructed_docs ) {     
408        print $outhandle "  Adding reconstructed ", $doc_obj->get_OID(), " into classify structures\n";
409        $self->{'buildproc'}->process($doc_obj,undef);
410    }
411    }
412
413   
414    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
415           "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
416
417    # this has changed to only output collection meta if its
418    # not in the config file
419    $self->output_collection_meta($infodb_handle);
420   
421    # output classification information
422    &classify::output_classify_info ($self->{'classifiers'}, $self->{'infodbtype'}, $infodb_handle,
423                     $self->{'remove_empty_classifications'},
424                     $self->{'gli'});
425
426    # Output classifier reverse lookup, used in incremental deletion
427    ####&classify::print_reverse_lookup($infodb_handle);
428
429    # output doclist
430    my @doc_list = $self->{'buildproc'}->get_doc_list();
431    my $browselist_infodb = { 'hastxt' => [ "0" ],
432                  'childtype' => [ "VList" ],
433                  'numleafdocs' => [ scalar(@doc_list) ],
434                  'thistype' => [ "Invisible" ],
435                  'contains' => [ join(";", @doc_list) ] };
436    &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);
437
438    &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_handle) if !$self->{'debug'};
439
440    print STDERR "</Stage>\n" if $self->{'gli'};
441}
442
443sub make_auxiliary_files {
444    my $self = shift (@_);
445    my ($index);
446    my $build_cfg = {};
447    # subclasses may have already defined stuff in here
448    if (defined $self->{'build_cfg'}) {
449    $build_cfg = $self->{'build_cfg'};
450    }
451
452    my $outhandle = $self->{'outhandle'};
453
454    print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
455    print STDERR "<Stage name='CreatingAuxilary'>\n" if $self->{'gli'};
456
457    # get the text directory
458    &util::mk_all_dir ($self->{'build_dir'});
459
460    # store the build date
461    $build_cfg->{'builddate'} = time;
462    $build_cfg->{'buildtype'} = $self->{'buildtype'};
463    $build_cfg->{'indexstem'} = &util::get_dirsep_tail($self->{'collection'});
464    $build_cfg->{'stemindexes'} = $self->{'stemindexes'};
465    if ($self->{'separate_cjk'}) {
466    $build_cfg->{'separate_cjk'} = "true";
467    }
468   
469    # store the number of documents and number of bytes
470    $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
471    $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
472    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
473
474    # store the mapping between the index names and the directory names
475    # the index map is used to determine what indexes there are, so any that are not built should not be put into the map.
476    my @indexmap = ();
477    foreach my $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
478    if (not defined ($self->{'notbuilt'}->{$index})) {
479        push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
480    }
481    }
482    $build_cfg->{'indexmap'} = \@indexmap if scalar (@indexmap);
483
484    my @subcollectionmap = ();
485    foreach my $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
486    push (@subcollectionmap, "$subcollection\-\>" .
487          $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
488    }
489    $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
490
491    my @languagemap = ();
492    foreach my $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
493    push (@languagemap, "$language\-\>" .
494          $self->{'index_mapping'}->{'languagemap'}->{$language});
495    }
496    $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
497
498    my @notbuilt = ();
499    foreach my $nb (keys %{$self->{'notbuilt'}}) {
500    push (@notbuilt, $nb);
501    }
502    $build_cfg->{'notbuilt'} = \@notbuilt if scalar (@notbuilt);
503
504    $build_cfg->{'maxnumeric'} = $self->{'maxnumeric'};
505
506    $build_cfg->{'infodbtype'} = $self->{'infodbtype'};
507
508    $self->build_cfg_extra($build_cfg);
509
510    if ($gs_mode eq "gs2") {
511      &colcfg::write_build_cfg(&util::filename_cat($self->{'build_dir'},"build.cfg"), $build_cfg);
512    }
513    if ($gs_mode eq "gs3") {
514
515      &colcfg::write_build_cfg_xml(&util::filename_cat($self->{'build_dir'}, "buildConfig.xml"), $build_cfg, $self->{'collect_cfg_preserve'}, $self->{'disable_OAI'});
516    }   
517
518    print STDERR "</Stage>\n" if $self->{'gli'};
519}
520
521# implement this in subclass if want to add extra stuff to build.cfg
522sub build_cfg_extra {
523   my $self = shift(@_);
524   my ($build_cfg) = @_;
525   
526}
527
528
529sub collect_specific {
530    my $self = shift (@_);
531}
532
533sub want_built {
534    my $self = shift (@_);
535    my ($index) = @_;
536
537    if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
538    foreach my $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
539        if ($index =~ /^$checkstr$/) {
540        $self->{'notbuilt'}->{$index} = 1;
541        return 0;
542        }
543    }
544    }
545
546    return 1;
547}
548
549sub create_index_mapping {
550    my $self = shift (@_);
551    my ($indexes) = @_;
552
553    print STDERR "create_index_mapping should be implemented in subclass\n";
554    my %mapping = ();
555    return \%mapping;
556}
557
558# returns a processed version of a field.
559# if the field has only one component the processed
560# version will contain the first character and next consonant
561# of that componant - otherwise it will contain the first
562# character of the first two components
563# only uses letdig (\w) characters now
564sub process_field {
565    my $self = shift (@_);
566    my ($field) = @_;
567
568    return "" unless (defined ($field) && $field =~ /\S/);
569   
570    my ($a, $b);
571    my @components = split /,/, $field;
572    if (scalar @components >= 2) {
573    # pick the first letdig from the first two field names
574    ($a) = $components[0] =~ /^[^\w]*(\w)/;
575    ($b) = $components[1] =~ /^[^\w]*(\w)/;
576    } else {
577    # pick the first two letdig chars
578    ($a, $b) = $field =~ /^[^\w]*(\w)[^\w]*?(\w)/i;
579    }
580    # there may not have been any letdigs...
581    $a = 'a' unless defined $a;
582    $b = '0' unless defined $b;
583
584    return "$a$b";
585   
586}
587
588sub get_next_version {
589    my $self = shift (@_);
590    my ($nameref) = @_;
591    my $num=0;
592    if ($$nameref =~ /(\d\d)$/) {
593    $num = $1; $num ++;
594    $$nameref =~ s/\d\d$/$num/;
595    } elsif ($$nameref =~ /(\d)$/) {
596    $num = $1;
597    if ($num == 9) {$$nameref =~ s/\d$/10/;}
598    else {$num ++; $$nameref =~ s/\d$/$num/;}
599    } else {
600    $$nameref =~ s/.$/0/;
601    }
602}
603
604
605
606sub get_collection_meta_sets
607{
608    my $self = shift(@_);
609    my $collection_infodb = shift(@_);
610
611    my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'};
612    foreach my $prefix (keys %$mdprefix_fields)
613    {
614    push(@{$collection_infodb->{"metadataset"}}, $prefix);
615
616    foreach my $field (keys %{$mdprefix_fields->{$prefix}})
617    {
618        push(@{$collection_infodb->{"metadatalist-$prefix"}}, $field);
619
620        my $val = $mdprefix_fields->{$prefix}->{$field};
621        push(@{$collection_infodb->{"metadatafreq-$prefix-$field"}}, $val);
622    }
623    }
624}
625
626
627# default is to output the metadata sets (prefixes) used in collection
628sub output_collection_meta
629{
630    my $self = shift(@_);
631    my $infodb_handle = shift(@_);
632
633    my %collection_infodb = ();
634    $self->get_collection_meta_sets(\%collection_infodb);
635    &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "collection", \%collection_infodb);
636}
637
638# sometimes we need to read in an existing build.cfg - for example,
639# if doing each stage of building separately, or when doing incremental
640# building
641sub read_build_cfg {
642    my $self = shift(@_);
643
644    my $buildconfigfilename;
645   
646    if ($gs_mode eq "gs2") {
647    $buildconfigfilename = "build.cfg";
648    } else {
649    $buildconfigfilename = "buildConfig.xml";
650    }
651   
652    my $buildconfigfile = &util::filename_cat($self->{'build_dir'}, $buildconfigfilename);
653   
654    if (!-e $buildconfigfile) {
655    # try the index dir - but do we know where it is?? try here
656    $buildconfigfile  = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "index", $buildconfigfilename);
657    if (!-e $buildconfigfile) {
658        #we cant find a config file - just ignore the field list
659        return undef;
660    }
661    }
662    return &colcfg::read_building_cfg( $buildconfigfile, $gs_mode);
663   
664}
665
666sub print_stats {
667    my $self = shift (@_);
668
669    my $outhandle = $self->{'outhandle'};
670    my $indexing_text = $self->{'buildproc'}->get_indexing_text();
671    my $index = $self->{'buildproc'}->get_index();
672    my $num_bytes = $self->{'buildproc'}->get_num_bytes();
673    my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
674
675    if ($indexing_text) {
676    print $outhandle "Stats (Creating index $index)\n";
677    } else {
678    print $outhandle "Stats (Compressing text from $index)\n";
679    }
680    print $outhandle "Total bytes in collection: $num_bytes\n";
681    print $outhandle "Total bytes in $index: $num_processed_bytes\n";
682
683    if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) {
684   
685    if ($self->{'keepold'}) {
686        if ($num_processed_bytes == 0) {
687        if ($indexing_text) {
688            print $outhandle "No additional text was added to $index\n";
689        } elsif (!$self->{'no_text'}) {
690            print $outhandle "No additional text was compressed\n";
691        }   
692        }   
693    }
694    else {
695        print $outhandle "***************\n";
696        if ($indexing_text) {
697        print $outhandle "WARNING: There is very little or no text to process for $index\n";
698        } elsif (!$self->{'no_text'}) {
699        print $outhandle "WARNING: There is very little or no text to compress\n";
700        }     
701        print $outhandle "         Was this your intention?\n";
702        print $outhandle "***************\n";
703    }
704
705    }
706
707}
708
709 
7101;
711
Note: See TracBrowser for help on using the browser.