- Timestamp:
- 2000-07-13T10:21:53+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/New_Config_Format-branch/gsdl/perllib/mgbuilder.pm
r1072 r1279 99 99 100 100 # load all the plugins 101 $self->{'pluginfo'} = &plugin::load_plugins ($plugins );101 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity); 102 102 if (scalar(@{$self->{'pluginfo'}}) == 0) { 103 103 print STDERR "No plugins were loaded.\n"; … … 207 207 close ($handle) unless $self->{'debug'}; 208 208 209 $self->print_stats(); 210 209 211 # create the compression dictionary 210 212 # the compression dictionary is built by assuming the stats are from a seed … … 232 234 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); 233 235 close ($handle) unless $self->{'debug'}; 236 237 $self->print_stats(); 234 238 } 235 239 … … 481 485 close ($handle) unless $self->{'debug'}; 482 486 487 $self->print_stats(); 488 483 489 if (!$self->{'debug'}) { 484 490 # create the perfect hash function … … 502 508 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); 503 509 510 $self->print_stats (); 511 504 512 if (!$self->{'debug'}) { 505 513 … … 646 654 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes(); 647 655 656 # get additional stats from mg 657 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}"; 658 my $exe = &util::get_os_exe (); 659 my $mgstat_exe = &util::filename_cat($exedir, "mgstat$exe"); 660 my $input_file = &util::filename_cat ("text", $self->{'collection'}); 661 if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) { 662 print STDERR "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n"; 663 } else { 664 my $line = ""; 665 while (defined ($line = <PIPEIN>)) { 666 if ($line =~ /^Words in collection \[dict\]\s+:\s+(\d+)/) { 667 ($build_cfg->{'numwords'}) = $1; 668 } elsif ($line =~ /^Documents\s+:\s+(\d+)/) { 669 ($build_cfg->{'numsections'}) = $1; 670 } 671 } 672 close PIPEIN; 673 } 674 648 675 # store the mapping between the index names and the directory names 649 676 my @indexmap = (); … … 667 694 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap); 668 695 669 $build_cfg->{'notbuilt'} = $self->{'notbuilt'} ;696 $build_cfg->{'notbuilt'} = $self->{'notbuilt'} if scalar @{$self->{'notbuilt'}}; 670 697 671 698 # write out the build information 672 699 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg, 673 '^(builddate|numdocs|numbytes )$',700 '^(builddate|numdocs|numbytes|numwords|numsections)$', 674 701 '^(indexmap|subcollectionmap|languagemap|notbuilt)$'); 675 702 … … 680 707 } 681 708 709 sub print_stats { 710 my $self = shift (@_); 711 712 my $indexing_text = $self->{'buildproc'}->get_indexing_text(); 713 my $index = $self->{'buildproc'}->get_index(); 714 my $num_bytes = $self->{'buildproc'}->get_num_bytes(); 715 my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes(); 716 717 if ($indexing_text) { 718 print STDERR "Stats (Creating index $index)\n"; 719 } else { 720 print STDERR "Stats (Compressing text from $index)\n"; 721 } 722 print STDERR "Total bytes in collection: $num_bytes\n"; 723 print STDERR "Total bytes in $index: $num_processed_bytes\n"; 724 725 if ($num_processed_bytes < 50) { 726 print STDERR "***************\n"; 727 print STDERR "WARNING: There is very little or no text to process for $index\n"; 728 if ($indexing_text) { 729 print STDERR "This may cause an error while attempting to build the index\n"; 730 } else { 731 print STDERR "This may cause an error while attempting to compress the text\n"; 732 } 733 print STDERR "***************\n"; 734 } 735 } 682 736 683 737 1;
Note:
See TracChangeset
for help on using the changeset viewer.