source: gsdl/trunk/perllib/basebuilder.pm@ 15728

Last change on this file since 15728 was 15728, checked in by mdewsnip, 16 years ago

(Adding new DB support) Now writes the infodbtype field to the build.cfg file so the collection server knows what type of dbclass to use with the collection.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.4 KB
RevLine 
[14930]1###########################################################################
2#
3# basebuilder.pm -- base class for collection builders
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package basebuilder;
27
28use strict;
29no strict 'refs'; # allow filehandles to be variables and viceversa
30
31use classify;
32use cfgread;
33use colcfg;
[15709]34use dbutil;
[14930]35use plugin;
36use util;
37
[15709]38
[14930]39BEGIN {
40 # set autoflush on for STDERR and STDOUT so that mgpp
41 # doesn't get out of sync with plugins
42 STDOUT->autoflush(1);
43 STDERR->autoflush(1);
44}
45
46END {
47 STDOUT->autoflush(0);
48 STDERR->autoflush(0);
49}
50
51our $maxdocsize = 12000;
52
53# used to signify "gs2"(default) or "gs3"
54my $gs_mode = "gs2";
55
56sub new {
57 my ($class, $collection, $source_dir, $build_dir, $verbosity,
58 $maxdocs, $debug, $keepold, $incremental, $incremental_dlc,
59 $remove_empty_classifications,
60 $outhandle, $no_text, $failhandle, $gli, $disable_OAI) = @_;
61
62 $outhandle = *STDERR unless defined $outhandle;
63 $no_text = 0 unless defined $no_text;
64 $failhandle = *STDERR unless defined $failhandle;
65
66 # create a builder object
67 my $self = bless {'collection'=>$collection,
68 'source_dir'=>$source_dir,
69 'build_dir'=>$build_dir,
70 'verbosity'=>$verbosity,
71 'maxdocs'=>$maxdocs,
72 'debug'=>$debug,
73 'keepold'=>$keepold,
74 'incremental'=>$incremental,
75 'incremental_dlc' => $incremental_dlc,
76 'remove_empty_classifications'=>$remove_empty_classifications,
77 'outhandle'=>$outhandle,
78 'no_text'=>$no_text,
79 'failhandle'=>$failhandle,
80 'notbuilt'=>{}, # indexes not built
81 'gli'=>$gli,
82 'disable_OAI'=>$disable_OAI
83 }, $class;
84
85 $self->{'gli'} = 0 unless defined $self->{'gli'};
86
87 # disable_OIA applies to greenstone 3 only and is only passed to &colcfg::write_build_cfg_xml (then cfgread4gs3::write_cfg_file) when writing the buildConfig.xml
88 $self->{'disable_OAI'} = 0 unless defined $self->{'disable_OAI'};
89
90 # Read in the collection configuration file.
91 my ($colcfgname);
92 ($colcfgname, $gs_mode) = &colcfg::get_collect_cfg_name($outhandle);
93 if ($gs_mode eq "gs2") {
94 $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname);
95 } elsif ($gs_mode eq "gs3") {
[14384]96 $self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname);
97
[14668]98 #this $self->{'collect_cfg_preserve'} is used for gs3 only and to be passed to &colcfg::write_build_cfg_xml in sub make_auxilary_files later in this basebuilder.pm, we use this preserve object because $self->{'collect_cfg'}->{'classify'} somewhat gets modified during the calling of &classify::load_classifiers.
99 $self->{'collect_cfg_preserve'} = &colcfg::read_collection_cfg_xml ($colcfgname);
[14930]100 }
[15725]101
102 # get the database type for this collection from the collect.cfg file (may be undefined)
[15727]103 $self->{'infodbtype'} = $self->{'collect_cfg'}->{'infodbtype'} || &dbutil::get_default_infodb_type();
[15725]104
[14930]105 # get the list of plugins for this collection
106 my $plugins = [];
107 if (defined $self->{'collect_cfg'}->{'plugin'}) {
108 $plugins = $self->{'collect_cfg'}->{'plugin'};
109 }
110
111 # load all the plugins
[14384]112
[14930]113 #build up the extra global options for the plugins
114 my @global_opts = ();
115 if (defined $self->{'collect_cfg'}->{'separate_cjk'} && $self->{'collect_cfg'}->{'separate_cjk'} =~ /^true$/i) {
116 push @global_opts, "-separate_cjk";
[14384]117 }
[14930]118 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle, $failhandle, \@global_opts, $keepold);
119
120 if (scalar(@{$self->{'pluginfo'}}) == 0) {
121 print $outhandle "No plugins were loaded.\n";
122 die "\n";
123 }
124
125 # get the list of classifiers for this collection
126 my $classifiers = [];
127 if (defined $self->{'collect_cfg'}->{'classify'}) {
128 $classifiers = $self->{'collect_cfg'}->{'classify'};
129 }
130
131 # load all the classifiers
[14384]132 $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $build_dir, $outhandle);
[14930]133
[15688]134 # load up any dontdb fields
135 $self->{'dontdb'} = {};
[14930]136 if (defined ($self->{'collect_cfg'}->{'dontgdbm'})) {
137 foreach my $dg (@{$self->{'collect_cfg'}->{'dontgdbm'}}) {
[15688]138 $self->{'dontdb'}->{$dg} = 1;
[14930]139 }
140 }
141
142 $self->{'maxnumeric'} = 4;
143 return $self;
144}
145
146# stuff has been moved here from new, so we can use subclass methods
147sub init {
148 my $self = shift(@_);
149
150 $self->generate_index_list();
151 $self->generate_index_options();
152
153 # sort out subcollection indexes
154 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
155 my $indexes = $self->{'collect_cfg'}->{'indexes'};
156 $self->{'collect_cfg'}->{'indexes'} = [];
157 foreach my $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
158 foreach my $index (@$indexes) {
159 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
160 }
161 }
162 }
163
164 # sort out language subindexes
165 if (defined $self->{'collect_cfg'}->{'languages'}) {
166 my $indexes = $self->{'collect_cfg'}->{'indexes'};
167 $self->{'collect_cfg'}->{'indexes'} = [];
168 foreach my $language (@{$self->{'collect_cfg'}->{'languages'}}) {
169 foreach my $index (@$indexes) {
170 if (defined ($self->{'collect_cfg'}->{'indexsubcollections'})) {
171 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
172 }
173 else { # add in an empty subcollection field
174 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index\:\:$language");
175 }
176 }
177 }
178 }
179
180 if (defined($self->{'collect_cfg'}->{'indexes'})) {
181 # make sure that the same index isn't specified more than once
182 my %tmphash = ();
183 my @tmparray = @{$self->{'collect_cfg'}->{'indexes'}};
184 $self->{'collect_cfg'}->{'indexes'} = [];
185 foreach my $i (@tmparray) {
186 if (!defined ($tmphash{$i})) {
187 push (@{$self->{'collect_cfg'}->{'indexes'}}, $i);
188 $tmphash{$i} = 1;
189 }
190 }
191 } else {
192 $self->{'collect_cfg'}->{'indexes'} = [];
193 }
194
195 # load up the document processor for building
196 # if a buildproc class has been created for this collection, use it
197 # otherwise, use the mg buildproc
198 my ($buildprocdir, $buildproctype);
199 my $collection = $self->{'collection'};
200 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuildproc.pm") {
201 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
202 $buildproctype = "custombuildproc";
203 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuildproc.pm") {
204 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
205 $buildproctype = "custombuildproc";
206 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
207 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
208 $buildproctype = "${collection}buildproc";
209 } else {
210 $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
211 $buildproctype = $self->default_buildproc();
212 }
213 require "$buildprocdir/$buildproctype.pm";
214
215 eval("\$self->{'buildproc'} = new $buildproctype(\$self->{'collection'}, " .
216 "\$self->{'source_dir'}, \$self->{'build_dir'}, \$self->{'keepold'}, \$self->{'verbosity'}, \$self->{'outhandle'})");
217 die "$@" if $@;
218
219 if (!$self->{'debug'} && !$self->{'keepold'}) {
220 # remove any old builds
221 &util::rm_r($self->{'build_dir'});
222 &util::mk_all_dir($self->{'build_dir'});
223
224 # make the text directory
225 my $textdir = "$self->{'build_dir'}/text";
226 &util::mk_all_dir($textdir);
227 }
228
229}
230
231sub deinit {
232 my $self = shift (@_);
233
234 &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
235}
236
237sub set_sections_index_document_metadata {
238 my $self = shift (@_);
239 my ($index) = @_;
240
241 $self->{'buildproc'}->set_sections_index_document_metadata($index);
242}
243
244sub set_maxnumeric {
245 my $self = shift (@_);
246 my ($maxnumeric) = @_;
247
248 $self->{'maxnumeric'} = $maxnumeric;
249}
250sub set_strip_html {
251 my $self = shift (@_);
252 my ($strip) = @_;
253
254 $self->{'strip_html'} = $strip;
255 $self->{'buildproc'}->set_strip_html($strip);
256}
257
258sub compress_text {
259 my $self = shift (@_);
260 my ($textindex) = @_;
261
262 print STDERR "compress_text() should be implemented in subclass!!";
263 return;
264}
265
266
267sub build_indexes {
268 my $self = shift (@_);
269 my ($indexname) = @_;
270 my $outhandle = $self->{'outhandle'};
271
272 my $indexes = [];
273 if (defined $indexname && $indexname =~ /\w/) {
274 push @$indexes, $indexname;
275 } else {
276 $indexes = $self->{'collect_cfg'}->{'indexes'};
277 }
278
279 # create the mapping between the index descriptions
280 # and their directory names (includes subcolls and langs)
281 $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
282
283 # build each of the indexes
284 foreach my $index (@$indexes) {
285 if ($self->want_built($index)) {
286 print $outhandle "\n*** building index $index in subdirectory " .
287 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
288 print STDERR "<Stage name='Index' source='$index'>\n" if $self->{'gli'};
289 $self->build_index($index);
290 } else {
291 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
292 }
293 }
294
295 $self->build_indexes_extra();
296
297}
298
299sub build_indexes_extra {
300 my $self = shift(@_);
301
302}
303
304sub build_index {
305 my $self = shift (@_);
306 my ($index) = @_;
307
308 print STDERR "build_index should be implemented in subclass\n";
309 return;
310}
311
312
313
314sub make_infodatabase {
315 my $self = shift (@_);
316 my $outhandle = $self->{'outhandle'};
317
318 print STDERR "BuildDir: $self->{'build_dir'}\n";
319
320 my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
321 my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc");
322 &util::mk_all_dir ($textdir);
323 &util::mk_all_dir ($assocdir);
324
[15710]325 # Get info database file path
[15725]326 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir);
[14930]327
328 print $outhandle "\n*** creating the info database and processing associated files\n"
329 if ($self->{'verbosity'} >= 1);
330 print STDERR "<Stage name='CreateInfoData'>\n" if $self->{'gli'};
331
332 # init all the classifiers
333 &classify::init_classifiers ($self->{'classifiers'});
334
335 my $reconstructed_docs = undef;
336 if ($self->{'keepold'}) {
[15688]337 # reconstruct doc_obj metadata from database for all docs
[15725]338 $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path);
[14930]339 }
340
341 # set up the document processor
[15700]342 my ($infodb_handle);
[14930]343 if ($self->{'debug'}) {
[15700]344 $infodb_handle = *STDOUT;
[15710]345 }
346 else {
[15725]347 $infodb_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path);
[15711]348 if (!defined($infodb_handle))
349 {
[14930]350 print STDERR "<FatalError name='NoRunText2DB'/>\n</Stage>\n" if $self->{'gli'};
[15711]351 die "builder::make_infodatabase - couldn't open infodb write handle\n";
[14930]352 }
353 }
[15725]354
355 $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'});
[15700]356 $self->{'buildproc'}->set_output_handle ($infodb_handle);
[14930]357 $self->{'buildproc'}->set_mode ('infodb');
358 $self->{'buildproc'}->set_assocdir ($assocdir);
[15688]359 $self->{'buildproc'}->set_dontdb ($self->{'dontdb'});
[14930]360 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
361 $self->{'buildproc'}->set_indexing_text (0);
362 $self->{'buildproc'}->set_store_text(1);
363
364 # make_infodatabase needs full reset even for incremental build
[15688]365 # as incremental works by reconstructing all docs from the database and
[14930]366 # then adding in the new ones
367 $self->{'buildproc'}->zero_reset();
368
[14934]369 $self->{'buildproc'}->{'mdprefix_fields'} = {};
370
[14930]371 if ($self->{'keepold'}) {
372 # create flat classify structure, ready for new docs to be added
373 foreach my $doc_obj ( @$reconstructed_docs ) {
374 print $outhandle " Adding reconstructed ", $doc_obj->get_OID(), " into classify structures\n";
375 $self->{'buildproc'}->process($doc_obj,undef);
376 }
377 }
378
379
[14934]380 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
381 "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
382
[14930]383 # this has changed to only output collection meta if its
384 # not in the config file
[15700]385 $self->output_collection_meta($infodb_handle);
[14930]386
387 # output classification information
[15725]388 &classify::output_classify_info ($self->{'classifiers'}, $self->{'infodbtype'}, $infodb_handle,
[14930]389 $self->{'remove_empty_classifications'},
390 $self->{'gli'});
391
392 # Output classifier reverse lookup, used in incremental deletion
[15700]393 #&classify::print_reverse_lookup($infodb_handle);
[14930]394
[15700]395 # output doclist
396 my @doc_list = $self->{'buildproc'}->get_doc_list();
[15725]397 my $browselist_infodb = { 'hastxt' => [ "0" ],
398 'childtype' => [ "VList" ],
399 'numleafdocs' => [ scalar(@doc_list) ],
400 'thistype' => [ "Invisible" ],
401 'contains' => [ join(";", @doc_list) ] };
402 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);
[14930]403
[15700]404 close ($infodb_handle) if !$self->{'debug'};
[14930]405
406 print STDERR "</Stage>\n" if $self->{'gli'};
407}
408
409sub make_auxiliary_files {
410 my $self = shift (@_);
411 my ($index);
412 my $build_cfg = {};
413 # subclasses may have already defined stuff in here
414 if (defined $self->{'build_cfg'}) {
415 $build_cfg = $self->{'build_cfg'};
416 }
417
418 my $outhandle = $self->{'outhandle'};
419
420 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
421 print STDERR "<Stage name='CreatingAuxilary'>\n" if $self->{'gli'};
422
423 # get the text directory
424 &util::mk_all_dir ($self->{'build_dir'});
425
426 # store the build date
427 $build_cfg->{'builddate'} = time;
428 $build_cfg->{'buildtype'} = $self->{'buildtype'};
[15003]429 $build_cfg->{'indexstem'} = &util::get_dirsep_tail($self->{'collection'});
[14930]430 $build_cfg->{'stemindexes'} = $self->{'stemindexes'};
431
432 # store the number of documents and number of bytes
433 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
434 $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
435 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
436
437 # store the mapping between the index names and the directory names
438 # the index map is used to determine what indexes there are, so any that are not built should not be put into the map.
439 my @indexmap = ();
440 foreach my $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
441 if (not defined ($self->{'notbuilt'}->{$index})) {
442 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
443 }
444 }
445 $build_cfg->{'indexmap'} = \@indexmap if scalar (@indexmap);
446
447 my @subcollectionmap = ();
448 foreach my $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
449 push (@subcollectionmap, "$subcollection\-\>" .
450 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
451 }
452 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
453
454 my @languagemap = ();
455 foreach my $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
456 push (@languagemap, "$language\-\>" .
457 $self->{'index_mapping'}->{'languagemap'}->{$language});
458 }
459 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
460
461 my @notbuilt = ();
462 foreach my $nb (keys %{$self->{'notbuilt'}}) {
463 push (@notbuilt, $nb);
464 }
465 $build_cfg->{'notbuilt'} = \@notbuilt if scalar (@notbuilt);
466
467 $build_cfg->{'maxnumeric'} = $self->{'maxnumeric'};
468
[15728]469 $build_cfg->{'infodbtype'} = $self->{'infodbtype'};
470
[14930]471 $self->build_cfg_extra($build_cfg);
472
473 if ($gs_mode eq "gs2") {
474 &colcfg::write_build_cfg("$self->{'build_dir'}/build.cfg", $build_cfg);
475 }
[14384]476 if ($gs_mode eq "gs3") {
[14930]477
478 &colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg_preserve'}, $self->{'disable_OAI'});
479 }
480
481 print STDERR "</Stage>\n" if $self->{'gli'};
482}
483
484sub collect_specific {
485 my $self = shift (@_);
486}
487
488sub want_built {
489 my $self = shift (@_);
490 my ($index) = @_;
491
492 if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
493 foreach my $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
494 if ($index =~ /^$checkstr$/) {
495 $self->{'notbuilt'}->{$index} = 1;
496 return 0;
497 }
498 }
499 }
500
501 return 1;
502}
503
504sub create_index_mapping {
505 my $self = shift (@_);
506 my ($indexes) = @_;
507
508 print STDERR "create_index_mapping should be implemented in subclass\n";
509 my %mapping = ();
510 return \%mapping;
511}
512
513# returns a processed version of a field.
514# if the field has only one component the processed
515# version will contain the first character and next consonant
516# of that componant - otherwise it will contain the first
517# character of the first two components
518# only uses letdig (\w) characters now
519sub process_field {
520 my $self = shift (@_);
521 my ($field) = @_;
522
523 return "" unless (defined ($field) && $field =~ /\S/);
524
525 my ($a, $b);
526 my @components = split /,/, $field;
527 if (scalar @components >= 2) {
528 # pick the first letdig from the first two field names
529 ($a) = $components[0] =~ /^[^\w]*(\w)/;
530 ($b) = $components[1] =~ /^[^\w]*(\w)/;
531 } else {
532 # pick the first two letdig chars
533 ($a, $b) = $field =~ /^[^\w]*(\w)[^\w]*?(\w)/i;
534 }
535 # there may not have been any letdigs...
536 $a = 'a' unless defined $a;
537 $b = '0' unless defined $b;
538
539 return "$a$b";
540
541}
542
543sub get_next_version {
544 my $self = shift (@_);
545 my ($nameref) = @_;
546 my $num=0;
547 if ($$nameref =~ /(\d\d)$/) {
548 $num = $1; $num ++;
549 $$nameref =~ s/\d\d$/$num/;
550 } elsif ($$nameref =~ /(\d)$/) {
551 $num = $1;
552 if ($num == 9) {$$nameref =~ s/\d$/10/;}
553 else {$num ++; $$nameref =~ s/\d$/$num/;}
554 } else {
555 $$nameref =~ s/.$/0/;
556 }
557}
558
559# implement this in subclass if want to add extra stuff to build.cfg
560sub build_cfg_extra {
561 my $self = shift(@_);
562 my ($build_cfg) = @_;
563
564}
565
[14934]566
[15709]567sub get_collection_meta_sets
568{
[14930]569 my $self = shift(@_);
[15709]570 my $collection_infodb = shift(@_);
[14930]571
[14934]572 my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'};
573 foreach my $prefix (keys %$mdprefix_fields)
574 {
[15709]575 push(@{$collection_infodb->{"metadataset"}}, $prefix);
[14934]576
577 foreach my $field (keys %{$mdprefix_fields->{$prefix}})
578 {
[15709]579 push(@{$collection_infodb->{"metadatalist-$prefix"}}, $field);
580
[14934]581 my $val = $mdprefix_fields->{$prefix}->{$field};
[15709]582 push(@{$collection_infodb->{"metadatafreq-$prefix-$field"}}, $val);
[14934]583 }
584 }
[15709]585}
[14934]586
587
588# default is to output the metadata sets (prefixes) used in collection
[15709]589sub output_collection_meta
590{
[14934]591 my $self = shift(@_);
[15709]592 my $infodb_handle = shift(@_);
[14934]593
[15709]594 my %collection_infodb = ();
595 $self->get_collection_meta_sets(\%collection_infodb);
[15725]596 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "collection", \%collection_infodb);
[15709]597}
[14934]598
599
[14930]600sub print_stats {
601 my $self = shift (@_);
602
603 my $outhandle = $self->{'outhandle'};
604 my $indexing_text = $self->{'buildproc'}->get_indexing_text();
605 my $index = $self->{'buildproc'}->get_index();
606 my $num_bytes = $self->{'buildproc'}->get_num_bytes();
607 my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
608
609 if ($indexing_text) {
610 print $outhandle "Stats (Creating index $index)\n";
611 } else {
612 print $outhandle "Stats (Compressing text from $index)\n";
613 }
614 print $outhandle "Total bytes in collection: $num_bytes\n";
615 print $outhandle "Total bytes in $index: $num_processed_bytes\n";
616
617 if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) {
618
619 if ($self->{'keepold'}) {
620 if ($num_processed_bytes == 0) {
621 if ($indexing_text) {
622 print $outhandle "No additional text was added to $index\n";
623 } elsif (!$self->{'no_text'}) {
624 print $outhandle "No additional text was compressed\n";
625 }
626 }
627 }
628 else {
629 print $outhandle "***************\n";
630 if ($indexing_text) {
631 print $outhandle "WARNING: There is very little or no text to process for $index\n";
632 } elsif (!$self->{'no_text'}) {
633 print $outhandle "WARNING: There is very little or no text to compress\n";
634 }
635 print $outhandle " Was this your intention?\n";
636 print $outhandle "***************\n";
637 }
638
639 }
640
641}
642
643
6441;
645
Note: See TracBrowser for help on using the repository browser.