Changeset 1251 for trunk/gsdl/perllib/mgbuildproc.pm
- Timestamp:
- 2000-06-29T10:34:25+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/mgbuildproc.pm
r1072 r1251 58 58 $self->{'num_sections'} = 0; 59 59 $self->{'num_bytes'} = 0; 60 $self->{'num_processed_bytes'} = 0; 60 61 61 62 $self->{'indexing_text'} = 0; … … 69 70 $self->{'num_docs'} = 0; 70 71 $self->{'num_sections'} = 0; 72 $self->{'num_processed_bytes'} = 0; 71 73 $self->{'num_bytes'} = 0; 72 74 } … … 84 86 } 85 87 88 # num_bytes is the actual number of bytes in the collection 89 # this is normally the same as what's processed during text compression 86 90 sub get_num_bytes { 87 91 my $self = shift (@_); 88 92 89 93 return $self->{'num_bytes'}; 94 } 95 96 # num_processed_bytes is the number of bytes actually passed 97 # to mg for the current index 98 sub get_num_processed_bytes { 99 my $self = shift (@_); 100 101 return $self->{'num_processed_bytes'}; 90 102 } 91 103 … … 126 138 } 127 139 140 sub get_index { 141 my $self = shift (@_); 142 143 return $self->{'index'}; 144 } 145 128 146 sub set_classifiers { 129 147 my $self = shift (@_); … … 138 156 139 157 $self->{'indexing_text'} = $indexing_text; 158 } 159 160 sub get_indexing_text { 161 my $self = shift (@_); 162 163 return $self->{'indexing_text'}; 140 164 } 141 165 … … 416 440 if ($real_field eq "text") { 417 441 $new_text = $doc_obj->get_text ($section); 442 $self->{'num_processed_bytes'} += length ($new_text); 418 443 $new_text =~ s/[\cB\cC]//g; 419 444 $self->find_paragraphs($new_text); … … 423 448 foreach $meta (@{$doc_obj->get_metadata ($section, $real_field)}) { 424 449 $meta =~ s/[\cB\cC]//g; 450 $self->{'num_processed_bytes'} += length ($meta); 425 451 $new_text .= "\cC" unless $first; 426 452 $new_text .= $meta;
Note:
See TracChangeset
for help on using the changeset viewer.