source: gsdl/trunk/perllib/basebuilder.pm@ 17110

Last change on this file since 17110 was 17110, checked in by kjdon, 16 years ago

changed way cjk separation is done. Not done in plugins any more, but is now an indexoption. cnseg called from filter_text method. generate_index_options sets up the field in buildproc

  • Property svn:keywords set to Author Date Id Revision
File size: 21.1 KB
Line 
1###########################################################################
2#
3# basebuilder.pm -- base class for collection builders
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package basebuilder;
27
28use strict;
29no strict 'refs'; # allow filehandles to be variables and viceversa
30
31use classify;
32use cfgread;
33use colcfg;
34use dbutil;
35use plugin;
36use util;
37
38
39BEGIN {
40 # set autoflush on for STDERR and STDOUT so that mgpp
41 # doesn't get out of sync with plugins
42 STDOUT->autoflush(1);
43 STDERR->autoflush(1);
44}
45
46END {
47 STDOUT->autoflush(0);
48 STDERR->autoflush(0);
49}
50
51our $maxdocsize = 12000;
52
53# used to signify "gs2"(default) or "gs3"
54my $gs_mode = "gs2";
55
56sub new {
57 my ($class, $collection, $source_dir, $build_dir, $verbosity,
58 $maxdocs, $debug, $keepold, $incremental,
59 $remove_empty_classifications,
60 $outhandle, $no_text, $failhandle, $gli, $disable_OAI) = @_;
61
62 $outhandle = *STDERR unless defined $outhandle;
63 $no_text = 0 unless defined $no_text;
64 $failhandle = *STDERR unless defined $failhandle;
65
66 # create a builder object
67 my $self = bless {'collection'=>$collection,
68 'source_dir'=>$source_dir,
69 'build_dir'=>$build_dir,
70 'verbosity'=>$verbosity,
71 'maxdocs'=>$maxdocs,
72 'debug'=>$debug,
73 'keepold'=>$keepold,
74 'incremental'=>$incremental,
75 'remove_empty_classifications'=>$remove_empty_classifications,
76 'outhandle'=>$outhandle,
77 'no_text'=>$no_text,
78 'failhandle'=>$failhandle,
79 'notbuilt'=>{}, # indexes not built
80 'gli'=>$gli,
81 'disable_OAI'=>$disable_OAI
82 }, $class;
83
84 $self->{'gli'} = 0 unless defined $self->{'gli'};
85
86 # disable_OIA applies to greenstone 3 only and is only passed to &colcfg::write_build_cfg_xml (then cfgread4gs3::write_cfg_file) when writing the buildConfig.xml
87 $self->{'disable_OAI'} = 0 unless defined $self->{'disable_OAI'};
88
89 # Read in the collection configuration file.
90 my ($colcfgname);
91 ($colcfgname, $gs_mode) = &colcfg::get_collect_cfg_name($outhandle);
92 if ($gs_mode eq "gs2") {
93 $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname);
94 } elsif ($gs_mode eq "gs3") {
95 $self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname);
96
97 #this $self->{'collect_cfg_preserve'} is used for gs3 only and to be passed to &colcfg::write_build_cfg_xml in sub make_auxilary_files later in this basebuilder.pm, we use this preserve object because $self->{'collect_cfg'}->{'classify'} somewhat gets modified during the calling of &classify::load_classifiers.
98 $self->{'collect_cfg_preserve'} = &colcfg::read_collection_cfg_xml ($colcfgname);
99 }
100
101 # get the database type for this collection from the collect.cfg file (may be undefined)
102 $self->{'infodbtype'} = $self->{'collect_cfg'}->{'infodbtype'} || &dbutil::get_default_infodb_type();
103
104 # get the list of plugins for this collection
105 my $plugins = [];
106 if (defined $self->{'collect_cfg'}->{'plugin'}) {
107 $plugins = $self->{'collect_cfg'}->{'plugin'};
108 }
109
110 # load all the plugins
111
112 #build up the extra global options for the plugins
113 my @global_opts = ();
114 if (defined $self->{'collect_cfg'}->{'separate_cjk'} && $self->{'collect_cfg'}->{'separate_cjk'} =~ /^true$/i) {
115 push @global_opts, "-separate_cjk";
116 }
117 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle, $failhandle, \@global_opts, $keepold);
118
119 if (scalar(@{$self->{'pluginfo'}}) == 0) {
120 print $outhandle "No plugins were loaded.\n";
121 die "\n";
122 }
123
124 # get the list of classifiers for this collection
125 my $classifiers = [];
126 if (defined $self->{'collect_cfg'}->{'classify'}) {
127 $classifiers = $self->{'collect_cfg'}->{'classify'};
128 }
129
130 # load all the classifiers
131 $self->{'classifiers'} = &classify::load_classifiers ($classifiers, $build_dir, $outhandle);
132
133 # load up any dontdb fields
134 $self->{'dontdb'} = {};
135 if (defined ($self->{'collect_cfg'}->{'dontgdbm'})) {
136 foreach my $dg (@{$self->{'collect_cfg'}->{'dontgdbm'}}) {
137 $self->{'dontdb'}->{$dg} = 1;
138 }
139 }
140
141 $self->{'maxnumeric'} = 4;
142 return $self;
143}
144
145# stuff has been moved here from new, so we can use subclass methods
146sub init {
147 my $self = shift(@_);
148
149 $self->generate_index_list();
150
151 # sort out subcollection indexes
152 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
153 my $indexes = $self->{'collect_cfg'}->{'indexes'};
154 $self->{'collect_cfg'}->{'indexes'} = [];
155 foreach my $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
156 foreach my $index (@$indexes) {
157 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
158 }
159 }
160 }
161
162 # sort out language subindexes
163 if (defined $self->{'collect_cfg'}->{'languages'}) {
164 my $indexes = $self->{'collect_cfg'}->{'indexes'};
165 $self->{'collect_cfg'}->{'indexes'} = [];
166 foreach my $language (@{$self->{'collect_cfg'}->{'languages'}}) {
167 foreach my $index (@$indexes) {
168 if (defined ($self->{'collect_cfg'}->{'indexsubcollections'})) {
169 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
170 }
171 else { # add in an empty subcollection field
172 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index\:\:$language");
173 }
174 }
175 }
176 }
177
178 if (defined($self->{'collect_cfg'}->{'indexes'})) {
179 # make sure that the same index isn't specified more than once
180 my %tmphash = ();
181 my @tmparray = @{$self->{'collect_cfg'}->{'indexes'}};
182 $self->{'collect_cfg'}->{'indexes'} = [];
183 foreach my $i (@tmparray) {
184 if (!defined ($tmphash{$i})) {
185 push (@{$self->{'collect_cfg'}->{'indexes'}}, $i);
186 $tmphash{$i} = 1;
187 }
188 }
189 } else {
190 $self->{'collect_cfg'}->{'indexes'} = [];
191 }
192
193 # load up the document processor for building
194 # if a buildproc class has been created for this collection, use it
195 # otherwise, use the mg buildproc
196 my ($buildprocdir, $buildproctype);
197 my $collection = $self->{'collection'};
198 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuildproc.pm") {
199 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
200 $buildproctype = "custombuildproc";
201 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuildproc.pm") {
202 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
203 $buildproctype = "custombuildproc";
204 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
205 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
206 $buildproctype = "${collection}buildproc";
207 } else {
208 $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
209 $buildproctype = $self->default_buildproc();
210 }
211 require "$buildprocdir/$buildproctype.pm";
212
213 eval("\$self->{'buildproc'} = new $buildproctype(\$self->{'collection'}, " .
214 "\$self->{'source_dir'}, \$self->{'build_dir'}, \$self->{'keepold'}, \$self->{'verbosity'}, \$self->{'outhandle'})");
215 die "$@" if $@;
216
217
218 $self->generate_index_options();
219
220 if (!$self->{'debug'} && !$self->{'keepold'}) {
221 # remove any old builds
222 &util::rm_r($self->{'build_dir'});
223 &util::mk_all_dir($self->{'build_dir'});
224
225 # make the text directory
226 my $textdir = "$self->{'build_dir'}/text";
227 &util::mk_all_dir($textdir);
228 }
229
230}
231
232sub deinit {
233 my $self = shift (@_);
234
235 &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
236}
237
238sub generate_index_options {
239 my $self = shift (@_);
240
241 my $separate_cjk = 0;
242
243 if (defined($self->{'collect_cfg'}->{'indexoptions'})) {
244 foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) {
245 if ($option =~ /separate_cjk/) {
246 $separate_cjk = 1;
247 }
248 }
249 }
250 # set this for building
251 $self->{'buildproc'}->set_separate_cjk($separate_cjk);
252 # record it for build.cfg
253 $self->{'separate_cjk'} = $separate_cjk;
254}
255
256sub set_sections_index_document_metadata {
257 my $self = shift (@_);
258 my ($index) = @_;
259
260 $self->{'buildproc'}->set_sections_index_document_metadata($index);
261}
262
263sub set_maxnumeric {
264 my $self = shift (@_);
265 my ($maxnumeric) = @_;
266
267 $self->{'maxnumeric'} = $maxnumeric;
268}
269sub set_strip_html {
270 my $self = shift (@_);
271 my ($strip) = @_;
272
273 $self->{'strip_html'} = $strip;
274 $self->{'buildproc'}->set_strip_html($strip);
275}
276
277sub compress_text {
278 my $self = shift (@_);
279 my ($textindex) = @_;
280
281 print STDERR "compress_text() should be implemented in subclass!!";
282 return;
283}
284
285
286sub build_indexes {
287 my $self = shift (@_);
288 my ($indexname) = @_;
289 my $outhandle = $self->{'outhandle'};
290
291 my $indexes = [];
292 if (defined $indexname && $indexname =~ /\w/) {
293 push @$indexes, $indexname;
294 } else {
295 $indexes = $self->{'collect_cfg'}->{'indexes'};
296 }
297
298 # create the mapping between the index descriptions
299 # and their directory names (includes subcolls and langs)
300 $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
301
302 # build each of the indexes
303 foreach my $index (@$indexes) {
304 if ($self->want_built($index)) {
305 print $outhandle "\n*** building index $index in subdirectory " .
306 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
307 print STDERR "<Stage name='Index' source='$index'>\n" if $self->{'gli'};
308 $self->build_index($index);
309 } else {
310 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
311 }
312 }
313
314 $self->build_indexes_extra();
315
316}
317
318sub build_indexes_extra {
319 my $self = shift(@_);
320
321}
322
323sub build_index {
324 my $self = shift (@_);
325 my ($index) = @_;
326
327 print STDERR "build_index should be implemented in subclass\n";
328 return;
329}
330
331
332
333sub make_infodatabase {
334 my $self = shift (@_);
335 my $outhandle = $self->{'outhandle'};
336
337 print STDERR "BuildDir: $self->{'build_dir'}\n";
338
339 my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
340 my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc");
341 &util::mk_all_dir ($textdir);
342 &util::mk_all_dir ($assocdir);
343
344 # Get info database file path
345 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir);
346
347 print $outhandle "\n*** creating the info database and processing associated files\n"
348 if ($self->{'verbosity'} >= 1);
349 print STDERR "<Stage name='CreateInfoData'>\n" if $self->{'gli'};
350
351 # init all the classifiers
352 &classify::init_classifiers ($self->{'classifiers'});
353
354 my $reconstructed_docs = undef;
355 if ($self->{'keepold'}) {
356 # reconstruct doc_obj metadata from database for all docs
357 $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path);
358 }
359
360 # set up the document processor
361 my ($infodb_handle);
362 if ($self->{'debug'}) {
363 $infodb_handle = *STDOUT;
364 }
365 else {
366 $infodb_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path);
367 if (!defined($infodb_handle))
368 {
369 print STDERR "<FatalError name='NoRunText2DB'/>\n</Stage>\n" if $self->{'gli'};
370 die "builder::make_infodatabase - couldn't open infodb write handle\n";
371 }
372 }
373
374 $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'});
375 $self->{'buildproc'}->set_output_handle ($infodb_handle);
376 $self->{'buildproc'}->set_mode ('infodb');
377 $self->{'buildproc'}->set_assocdir ($assocdir);
378 $self->{'buildproc'}->set_dontdb ($self->{'dontdb'});
379 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
380 $self->{'buildproc'}->set_indexing_text (0);
381 $self->{'buildproc'}->set_store_text(1);
382 $self->{'buildproc'}->set_store_metadata_coverage ($self->{'collect_cfg'}->{'store_metadata_coverage'});
383
384 # make_infodatabase needs full reset even for incremental build
385 # as incremental works by reconstructing all docs from the database and
386 # then adding in the new ones
387 $self->{'buildproc'}->zero_reset();
388
389 $self->{'buildproc'}->{'mdprefix_fields'} = {};
390
391 if ($self->{'keepold'}) {
392 # create flat classify structure, ready for new docs to be added
393 foreach my $doc_obj ( @$reconstructed_docs ) {
394 print $outhandle " Adding reconstructed ", $doc_obj->get_OID(), " into classify structures\n";
395 $self->{'buildproc'}->process($doc_obj,undef);
396 }
397 }
398
399
400 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
401 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
402
403 # this has changed to only output collection meta if its
404 # not in the config file
405 $self->output_collection_meta($infodb_handle);
406
407 # output classification information
408 &classify::output_classify_info ($self->{'classifiers'}, $self->{'infodbtype'}, $infodb_handle,
409 $self->{'remove_empty_classifications'},
410 $self->{'gli'});
411
412 # Output classifier reverse lookup, used in incremental deletion
413 #&classify::print_reverse_lookup($infodb_handle);
414
415 # output doclist
416 my @doc_list = $self->{'buildproc'}->get_doc_list();
417 my $browselist_infodb = { 'hastxt' => [ "0" ],
418 'childtype' => [ "VList" ],
419 'numleafdocs' => [ scalar(@doc_list) ],
420 'thistype' => [ "Invisible" ],
421 'contains' => [ join(";", @doc_list) ] };
422 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);
423
424 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_handle) if !$self->{'debug'};
425
426 print STDERR "</Stage>\n" if $self->{'gli'};
427}
428
429sub make_auxiliary_files {
430 my $self = shift (@_);
431 my ($index);
432 my $build_cfg = {};
433 # subclasses may have already defined stuff in here
434 if (defined $self->{'build_cfg'}) {
435 $build_cfg = $self->{'build_cfg'};
436 }
437
438 my $outhandle = $self->{'outhandle'};
439
440 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
441 print STDERR "<Stage name='CreatingAuxilary'>\n" if $self->{'gli'};
442
443 # get the text directory
444 &util::mk_all_dir ($self->{'build_dir'});
445
446 # store the build date
447 $build_cfg->{'builddate'} = time;
448 $build_cfg->{'buildtype'} = $self->{'buildtype'};
449 $build_cfg->{'indexstem'} = &util::get_dirsep_tail($self->{'collection'});
450 $build_cfg->{'stemindexes'} = $self->{'stemindexes'};
451 if ($self->{'separate_cjk'}) {
452 $build_cfg->{'separate_cjk'} = "true";
453 }
454
455 # store the number of documents and number of bytes
456 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
457 $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
458 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
459
460 # store the mapping between the index names and the directory names
461 # the index map is used to determine what indexes there are, so any that are not built should not be put into the map.
462 my @indexmap = ();
463 foreach my $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
464 if (not defined ($self->{'notbuilt'}->{$index})) {
465 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
466 }
467 }
468 $build_cfg->{'indexmap'} = \@indexmap if scalar (@indexmap);
469
470 my @subcollectionmap = ();
471 foreach my $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
472 push (@subcollectionmap, "$subcollection\-\>" .
473 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
474 }
475 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
476
477 my @languagemap = ();
478 foreach my $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
479 push (@languagemap, "$language\-\>" .
480 $self->{'index_mapping'}->{'languagemap'}->{$language});
481 }
482 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
483
484 my @notbuilt = ();
485 foreach my $nb (keys %{$self->{'notbuilt'}}) {
486 push (@notbuilt, $nb);
487 }
488 $build_cfg->{'notbuilt'} = \@notbuilt if scalar (@notbuilt);
489
490 $build_cfg->{'maxnumeric'} = $self->{'maxnumeric'};
491
492 $build_cfg->{'infodbtype'} = $self->{'infodbtype'};
493
494 $self->build_cfg_extra($build_cfg);
495
496 if ($gs_mode eq "gs2") {
497 &colcfg::write_build_cfg("$self->{'build_dir'}/build.cfg", $build_cfg);
498 }
499 if ($gs_mode eq "gs3") {
500
501 &colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg_preserve'}, $self->{'disable_OAI'});
502 }
503
504 print STDERR "</Stage>\n" if $self->{'gli'};
505}
506
507sub collect_specific {
508 my $self = shift (@_);
509}
510
511sub want_built {
512 my $self = shift (@_);
513 my ($index) = @_;
514
515 if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
516 foreach my $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
517 if ($index =~ /^$checkstr$/) {
518 $self->{'notbuilt'}->{$index} = 1;
519 return 0;
520 }
521 }
522 }
523
524 return 1;
525}
526
527sub create_index_mapping {
528 my $self = shift (@_);
529 my ($indexes) = @_;
530
531 print STDERR "create_index_mapping should be implemented in subclass\n";
532 my %mapping = ();
533 return \%mapping;
534}
535
536# returns a processed version of a field.
537# if the field has only one component the processed
538# version will contain the first character and next consonant
539# of that componant - otherwise it will contain the first
540# character of the first two components
541# only uses letdig (\w) characters now
542sub process_field {
543 my $self = shift (@_);
544 my ($field) = @_;
545
546 return "" unless (defined ($field) && $field =~ /\S/);
547
548 my ($a, $b);
549 my @components = split /,/, $field;
550 if (scalar @components >= 2) {
551 # pick the first letdig from the first two field names
552 ($a) = $components[0] =~ /^[^\w]*(\w)/;
553 ($b) = $components[1] =~ /^[^\w]*(\w)/;
554 } else {
555 # pick the first two letdig chars
556 ($a, $b) = $field =~ /^[^\w]*(\w)[^\w]*?(\w)/i;
557 }
558 # there may not have been any letdigs...
559 $a = 'a' unless defined $a;
560 $b = '0' unless defined $b;
561
562 return "$a$b";
563
564}
565
566sub get_next_version {
567 my $self = shift (@_);
568 my ($nameref) = @_;
569 my $num=0;
570 if ($$nameref =~ /(\d\d)$/) {
571 $num = $1; $num ++;
572 $$nameref =~ s/\d\d$/$num/;
573 } elsif ($$nameref =~ /(\d)$/) {
574 $num = $1;
575 if ($num == 9) {$$nameref =~ s/\d$/10/;}
576 else {$num ++; $$nameref =~ s/\d$/$num/;}
577 } else {
578 $$nameref =~ s/.$/0/;
579 }
580}
581
582# implement this in subclass if want to add extra stuff to build.cfg
583sub build_cfg_extra {
584 my $self = shift(@_);
585 my ($build_cfg) = @_;
586
587}
588
589
590sub get_collection_meta_sets
591{
592 my $self = shift(@_);
593 my $collection_infodb = shift(@_);
594
595 my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'};
596 foreach my $prefix (keys %$mdprefix_fields)
597 {
598 push(@{$collection_infodb->{"metadataset"}}, $prefix);
599
600 foreach my $field (keys %{$mdprefix_fields->{$prefix}})
601 {
602 push(@{$collection_infodb->{"metadatalist-$prefix"}}, $field);
603
604 my $val = $mdprefix_fields->{$prefix}->{$field};
605 push(@{$collection_infodb->{"metadatafreq-$prefix-$field"}}, $val);
606 }
607 }
608}
609
610
611# default is to output the metadata sets (prefixes) used in collection
612sub output_collection_meta
613{
614 my $self = shift(@_);
615 my $infodb_handle = shift(@_);
616
617 my %collection_infodb = ();
618 $self->get_collection_meta_sets(\%collection_infodb);
619 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, "collection", \%collection_infodb);
620}
621
622
623sub print_stats {
624 my $self = shift (@_);
625
626 my $outhandle = $self->{'outhandle'};
627 my $indexing_text = $self->{'buildproc'}->get_indexing_text();
628 my $index = $self->{'buildproc'}->get_index();
629 my $num_bytes = $self->{'buildproc'}->get_num_bytes();
630 my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
631
632 if ($indexing_text) {
633 print $outhandle "Stats (Creating index $index)\n";
634 } else {
635 print $outhandle "Stats (Compressing text from $index)\n";
636 }
637 print $outhandle "Total bytes in collection: $num_bytes\n";
638 print $outhandle "Total bytes in $index: $num_processed_bytes\n";
639
640 if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) {
641
642 if ($self->{'keepold'}) {
643 if ($num_processed_bytes == 0) {
644 if ($indexing_text) {
645 print $outhandle "No additional text was added to $index\n";
646 } elsif (!$self->{'no_text'}) {
647 print $outhandle "No additional text was compressed\n";
648 }
649 }
650 }
651 else {
652 print $outhandle "***************\n";
653 if ($indexing_text) {
654 print $outhandle "WARNING: There is very little or no text to process for $index\n";
655 } elsif (!$self->{'no_text'}) {
656 print $outhandle "WARNING: There is very little or no text to compress\n";
657 }
658 print $outhandle " Was this your intention?\n";
659 print $outhandle "***************\n";
660 }
661
662 }
663
664}
665
666
6671;
668
Note: See TracBrowser for help on using the repository browser.