Changeset 1424 for trunk/gsdl/perllib/mgbuilder.pm
- Timestamp:
- 2000-08-18T17:37:15+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/mgbuilder.pm
r1304 r1424 61 61 sub new { 62 62 my ($class, $collection, $source_dir, $build_dir, $verbosity, 63 $maxdocs, $debug, $keepold, $allclassifications) = @_; 63 $maxdocs, $debug, $keepold, $allclassifications, $outhandle) = @_; 64 65 $outhandle = STDERR unless defined $outhandle; 64 66 65 67 # create an mgbuilder object … … 72 74 'keepold'=>$keepold, 73 75 'allclassifications'=>$allclassifications, 76 'outhandle'=>$outhandle, 74 77 'notbuilt'=>[] # indexes not built 75 78 }, $class; … … 112 115 113 116 # load all the plugins 114 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity );117 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity, $outhandle); 115 118 if (scalar(@{$self->{'pluginfo'}}) == 0) { 116 print STDERR"No plugins were loaded.\n";119 print $outhandle "No plugins were loaded.\n"; 117 120 die "\n"; 118 121 } … … 149 152 150 153 eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " . 151 "\$source_dir, \$build_dir, \$verbosity )");154 "\$source_dir, \$build_dir, \$verbosity, \$outhandle)"); 152 155 die "$@" if $@; 153 156 … … 176 179 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe"); 177 180 my $mg_compression_dict_exe = &util::filename_cat($exedir, "mg_compression_dict$exe"); 181 my $outhandle = $self->{'outhandle'}; 178 182 179 183 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, "text")); … … 188 192 } 189 193 190 print STDERR"\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);194 print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1); 191 195 192 196 # collect the statistics for the text 193 197 # -b $maxdocsize sets the maximum document size to be 12 meg 194 print STDERR"\n collecting text statistics\n" if ($self->{'verbosity'} >= 1);198 print $outhandle "\n collecting text statistics\n" if ($self->{'verbosity'} >= 1); 195 199 196 200 my ($handle); … … 226 230 # words being put into the dictionary first (-2 -k 5120) 227 231 if (!$self->{'debug'}) { 228 print STDERR"\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1);232 print $outhandle "\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1); 229 233 if (!-e "$mg_compression_dict_exe") { 230 234 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n"; … … 241 245 $self->{'buildproc'}->reset(); 242 246 # compress the text 243 print STDERR"\n compressing the text\n" if ($self->{'verbosity'} >= 1);247 print $outhandle "\n compressing the text\n" if ($self->{'verbosity'} >= 1); 244 248 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 245 249 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); … … 268 272 my $self = shift (@_); 269 273 my ($indexname) = @_; 274 my $outhandle = $self->{'outhandle'}; 270 275 271 276 my $indexes = []; … … 283 288 foreach $index (@$indexes) { 284 289 if ($self->want_built($index)) { 285 print STDERR"\n*** building index $index in subdirectory " .290 print $outhandle "\n*** building index $index in subdirectory " . 286 291 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1); 287 292 $self->build_index($index); 288 293 } else { 289 print STDERR"\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);294 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1); 290 295 } 291 296 } … … 413 418 my $self = shift (@_); 414 419 my ($index) = @_; 420 my $outhandle = $self->{'outhandle'}; 415 421 416 422 # get the full index directory path and make sure it exists … … 472 478 473 479 # Build index dictionary. Uses verbatim stem method 474 print STDERR"\n creating index dictionary\n" if ($self->{'verbosity'} >= 1);480 print $outhandle "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1); 475 481 my ($handle); 476 482 if ($self->{'debug'}) { … … 513 519 514 520 # invert the text 515 print STDERR"\n inverting the text\n" if ($self->{'verbosity'} >= 1);521 print $outhandle "\n inverting the text\n" if ($self->{'verbosity'} >= 1); 516 522 517 523 $self->{'buildproc'}->reset(); … … 526 532 527 533 # create the weights file 528 print STDERR"\n create the weights file\n" if ($self->{'verbosity'} >= 1);534 print $outhandle "\n create the weights file\n" if ($self->{'verbosity'} >= 1); 529 535 if (!-e "$mg_weights_build_exe") { 530 536 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n"; … … 533 539 534 540 # create 'on-disk' stemmed dictionary 535 print STDERR"\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1);541 print $outhandle "\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1); 536 542 if (!-e "$mg_invf_dict_exe") { 537 543 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n"; … … 541 547 542 548 # creates stem index files for the various stemming methods 543 print STDERR"\n creating stem indexes\n" if ($self->{'verbosity'} >= 1);549 print $outhandle "\n creating stem indexes\n" if ($self->{'verbosity'} >= 1); 544 550 if (!-e "$mg_stem_idx_exe") { 545 551 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n"; … … 559 565 if (defined $suffix && !defined $wanted_index_files{$suffix}) { 560 566 # delete it! 561 print STDERR"deleting $file\n" if $self->{'verbosity'} > 2;567 print $outhandle "deleting $file\n" if $self->{'verbosity'} > 2; 562 568 &util::rm (&util::filename_cat ($tmpdir, $file)); 563 569 } … … 569 575 sub make_infodatabase { 570 576 my $self = shift (@_); 577 my $outhandle = $self->{'outhandle'}; 578 571 579 my $textdir = &util::filename_cat($self->{'build_dir'}, "text"); 572 580 my $assocdir = &util::filename_cat($self->{'build_dir'}, "assoc"); … … 584 592 my $txt2db_exe = &util::filename_cat($exedir, "txt2db$exe"); 585 593 586 print STDERR"\n*** creating the info database and processing associated files\n"594 print $outhandle "\n*** creating the info database and processing associated files\n" 587 595 if ($self->{'verbosity'} >= 1); 588 596 … … 624 632 $self->{'collect_cfg'}->{'collectionmeta'}->{".$cmeta"} . "\n"; 625 633 } else { 626 print STDERR"mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";634 print $outhandle "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n"; 627 635 } 628 636 } else { … … 652 660 my ($index); 653 661 my %build_cfg = (); 654 655 print STDERR "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 662 my $outhandle = $self->{'outhandle'}; 663 664 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 656 665 657 666 # get the text directory … … 671 680 my $input_file = &util::filename_cat ("text", $self->{'collection'}); 672 681 if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) { 673 print STDERR"Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";682 print $outhandle "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n"; 674 683 } else { 675 684 my $line = ""; … … 721 730 my $self = shift (@_); 722 731 732 my $outhandle = $self->{'outhandle'}; 723 733 my $indexing_text = $self->{'buildproc'}->get_indexing_text(); 724 734 my $index = $self->{'buildproc'}->get_index(); … … 727 737 728 738 if ($indexing_text) { 729 print STDERR"Stats (Creating index $index)\n";730 } else { 731 print STDERR"Stats (Compressing text from $index)\n";732 } 733 print STDERR"Total bytes in collection: $num_bytes\n";734 print STDERR"Total bytes in $index: $num_processed_bytes\n";739 print $outhandle "Stats (Creating index $index)\n"; 740 } else { 741 print $outhandle "Stats (Compressing text from $index)\n"; 742 } 743 print $outhandle "Total bytes in collection: $num_bytes\n"; 744 print $outhandle "Total bytes in $index: $num_processed_bytes\n"; 735 745 736 746 if ($num_processed_bytes < 50) { 737 print STDERR"***************\n";738 print STDERR"WARNING: There is very little or no text to process for $index\n";747 print $outhandle "***************\n"; 748 print $outhandle "WARNING: There is very little or no text to process for $index\n"; 739 749 if ($indexing_text) { 740 print STDERR"This may cause an error while attempting to build the index\n";750 print $outhandle "This may cause an error while attempting to build the index\n"; 741 751 } else { 742 print STDERR"This may cause an error while attempting to compress the text\n";743 } 744 print STDERR"***************\n";752 print $outhandle "This may cause an error while attempting to compress the text\n"; 753 } 754 print $outhandle "***************\n"; 745 755 } 746 756 }
Note:
See TracChangeset
for help on using the changeset viewer.