source: gsdl/trunk/perllib/mgbuilder.pm@ 17110

Last change on this file since 17110 was 17110, checked in by kjdon, 16 years ago

changed way cjk separation is done. Not done in plugins any more, but is now an indexoption. cnseg called from filter_text method. generate_index_options sets up the field in buildproc

  • Property svn:keywords set to Author Date Id Revision
File size: 18.7 KB
RevLine 
[537]1###########################################################################
[4]2#
[537]3# mgbuilder.pm -- MGBuilder object
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package mgbuilder;
27
[10468]28use basebuilder;
[4]29use plugin;
[15716]30use strict; no strict 'refs';
[4]31use util;
32
[15716]33
[1304]34BEGIN {
[10468]35 @mgbuilder::ISA = ('basebuilder');
[1304]36}
37
38
[8716]39my %wanted_index_files = ('td'=>1,
[4]40 't'=>1,
41 'idb'=>1,
42 'ib1'=>1,
43 'ib2'=>1,
44 'ib3'=>1,
45 'i'=>1,
46 'ip'=>1,
47 'tiw'=>1,
48 'wa'=>1);
49
[10468]50my $maxdocsize = $basebuilder::maxdocsize;
[4]51
[10468]52
[4]53sub new {
[10468]54 my $class = shift(@_);
55
56 my $self = new basebuilder (@_);
57 $self = bless $self, $class;
[1424]58
[10468]59 $self->{'buildtype'} = "mg";
60 return $self;
61}
[4]62
[10468]63sub default_buildproc {
64 my $self = shift (@_);
[4]65
[10468]66 return "mgbuildproc";
67}
[4]68
[10468]69sub generate_index_list {
70 my $self = shift (@_);
71
[5225]72 if (!defined($self->{'collect_cfg'}->{'indexes'})) {
73 $self->{'collect_cfg'}->{'indexes'} = [];
74 }
[4743]75 if (scalar(@{$self->{'collect_cfg'}->{'indexes'}}) == 0) {
76 # no indexes have been specified so we'll build a "dummy:text" index
77 push (@{$self->{'collect_cfg'}->{'indexes'}}, "dummy:text");
78 }
79
[4]80}
81
[12910]82sub generate_index_options {
83 my $self = shift (@_);
[17110]84 $self->SUPER::generate_index_options();
85
[12910]86 $self->{'casefold'} = 0;
87 $self->{'stem'} = 0;
88 $self->{'accentfold'} = 0; #not yet implemented for mg
89
90 if (!defined($self->{'collect_cfg'}->{'indexoptions'})) {
91 # just use default options
92 $self->{'casefold'} = 1;
93 $self->{'stem'} = 1;
[17110]94
[12910]95 } else {
96 foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) {
97 if ($option =~ /stem/) {
98 $self->{'stem'} = 1;
99 } elsif ($option =~ /casefold/) {
100 $self->{'casefold'} = 1;
101 }
102 }
103 }
104
105 # now we record this for the build cfg
106 $self->{'stemindexes'} = 0;
107 if ($self->{'casefold'}) {
108 $self->{'stemindexes'} += 1;
109 }
110 if ($self->{'stem'}) {
111 $self->{'stemindexes'} += 2;
112 }
113
114
115}
116
[4]117sub compress_text {
118 my $self = shift (@_);
[134]119 my ($textindex) = @_;
[4]120 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
121 my $exe = &util::get_os_exe ();
[486]122 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
123 my $mg_compression_dict_exe = &util::filename_cat($exedir, "mg_compression_dict$exe");
[1424]124 my $outhandle = $self->{'outhandle'};
[4]125
[12340]126 my $maxnumeric = $self->{'maxnumeric'};
[4192]127
[4]128 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, "text"));
[15003]129
130 my $collect_tail = &util::get_dirsep_tail($self->{'collection'});
131 my $basefilename = &util::filename_cat("text",$collect_tail);
[4]132 my $fulltextprefix = &util::filename_cat ($self->{'build_dir'}, $basefilename);
133
134 my $osextra = "";
135 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[3115]136 $fulltextprefix =~ s@/@\\@g;
[4]137 } else {
138 $osextra = " -d /";
139 }
140
[1424]141 print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
[6332]142 print STDERR "<Stage name='CompressText'>\n" if $self->{'gli'};
[4]143
144 # collect the statistics for the text
145 # -b $maxdocsize sets the maximum document size to be 12 meg
[1424]146 print $outhandle "\n collecting text statistics\n" if ($self->{'verbosity'} >= 1);
[6407]147 print STDERR "<Phase name='CollectTextStats'/>\n" if $self->{'gli'};
[782]148
149 my ($handle);
150 if ($self->{'debug'}) {
[15716]151 $handle = *STDOUT;
152 }
153 else {
[782]154 if (!-e "$mg_passes_exe" ||
[15716]155 !open($handle, "| mg_passes$exe -f \"$fulltextprefix\" -b $maxdocsize -T1 -M $maxnumeric $osextra")) {
[6407]156 print STDERR "<FatalError name='NoRunMGPasses'>\n</Stage>\n" if $self->{'gli'};
[782]157 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n";
158 }
[4]159 }
[782]160
161 $self->{'buildproc'}->set_output_handle ($handle);
162 $self->{'buildproc'}->set_mode ('text');
163 $self->{'buildproc'}->set_index ($textindex);
164 $self->{'buildproc'}->set_indexing_text (0);
[7904]165
166
[2336]167 if ($self->{'no_text'}) {
168 $self->{'buildproc'}->set_store_text(0);
169 } else {
170 $self->{'buildproc'}->set_store_text(1);
171 }
[4]172 $self->{'buildproc'}->reset();
[7904]173
[835]174 &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'},
175 $self->{'buildproc'}, $self->{'maxdocs'});
176 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
[16379]177 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
[835]178 &plugin::end($self->{'pluginfo'});
[7904]179
[4]180
[782]181 close ($handle) unless $self->{'debug'};
182
[1251]183 $self->print_stats();
184
[4]185 # create the compression dictionary
186 # the compression dictionary is built by assuming the stats are from a seed
187 # dictionary (-S), if a novel word is encountered it is spelled out (-H),
188 # and the resulting dictionary must be less than 5 meg with the most frequent
189 # words being put into the dictionary first (-2 -k 5120)
[782]190 if (!$self->{'debug'}) {
[1424]191 print $outhandle "\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1);
[6407]192 print STDERR "<Phase name='CreatingCompress'/>\n" if $self->{'gli'};
[782]193 if (!-e "$mg_compression_dict_exe") {
194 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n";
195 }
[1679]196 system ("mg_compression_dict$exe -f \"$fulltextprefix\" -S -H -2 -k 5120 $osextra");
[782]197
198 # -b $maxdocsize sets the maximum document size to be 12 meg
[1072]199 if (!-e "$mg_passes_exe" ||
[4192]200 !open ($handle, "| mg_passes$exe -f \"$fulltextprefix\" -b $maxdocsize -T2 -M $maxnumeric $osextra")) {
[6407]201 print STDERR "<FatalError name='NoRunMGPasses'/>\n</Stage>\n" if $self->{'gli'};
[1072]202 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n";
[782]203 }
[4]204 }
[6332]205 else {
[6407]206 print STDERR "<Phase name='SkipCreatingComp'/>\n" if $self->{'gli'};
[6332]207 }
[4]208
[782]209 $self->{'buildproc'}->reset();
[4]210 # compress the text
[1424]211 print $outhandle "\n compressing the text\n" if ($self->{'verbosity'} >= 1);
[6407]212 print STDERR "<Phase name='CompressingText'/>\n" if $self->{'gli'};
[6332]213
[4]214 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
[16379]215 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
[7904]216
[782]217 close ($handle) unless $self->{'debug'};
[1251]218
219 $self->print_stats();
[6407]220 print STDERR "</Stage>\n" if $self->{'gli'};
[4]221}
222
[486]223
[4]224# creates directory names for each of the index descriptions
225sub create_index_mapping {
226 my $self = shift (@_);
227 my ($indexes) = @_;
228
229 my %mapping = ();
[290]230 $mapping{'indexmaporder'} = [];
231 $mapping{'subcollectionmaporder'} = [];
232 $mapping{'languagemaporder'} = [];
[4]233
234 # dirnames is used to check for collisions. Start this off
235 # with the manditory directory names
236 my %dirnames = ('text'=>'text',
237 'extra'=>'extra');
[8716]238 my %pnames = ('index' => {}, 'subcollection' => {}, 'languages' => {});
239 foreach my $index (@$indexes) {
[139]240 my ($level, $gran, $subcollection, $languages) = split (":", $index);
[4]241
[139]242 # the directory name starts with the first character of the index level
243 my ($pindex) = $level =~ /^(.)/;
[4]244
[139]245 # next comes a processed version of the index
246 $pindex .= $self->process_field ($gran);
247 $pindex = lc ($pindex);
248
[69]249 # next comes a processed version of the subcollection if there is one.
[139]250 my $psub = $self->process_field ($subcollection);
251 $psub = lc ($psub);
[69]252
[139]253 # next comes a processed version of the language if there is one.
254 my $plang = $self->process_field ($languages);
255 $plang = lc ($plang);
[4]256
[139]257 my $dirname = $pindex . $psub . $plang;
258
259 # check to be sure all index names are unique
260 while (defined ($dirnames{$dirname})) {
261 $dirname = $self->make_unique (\%pnames, $index, \$pindex, \$psub, \$plang);
[4]262 }
[1973]263 $mapping{$index} = $dirname;
[139]264
[290]265 # store the mapping orders as well as the maps
[1973]266 # also put index, subcollection and language fields into the mapping thing -
267 # (the full index name (eg document:text:subcol:lang) is not used on
268 # the query page) -these are used for collectionmeta later on
[290]269 if (!defined $mapping{'indexmap'}{"$level:$gran"}) {
270 $mapping{'indexmap'}{"$level:$gran"} = $pindex;
271 push (@{$mapping{'indexmaporder'}}, "$level:$gran");
[1973]272 if (!defined $mapping{"$level:$gran"}) {
273 $mapping{"$level:$gran"} = $pindex;
274 }
[290]275 }
276 if ($psub =~ /\w/ && !defined ($mapping{'subcollectionmap'}{$subcollection})) {
277 $mapping{'subcollectionmap'}{$subcollection} = $psub;
278 push (@{$mapping{'subcollectionmaporder'}}, $subcollection);
[1973]279 $mapping{$subcollection} = $psub;
[290]280 }
281 if ($plang =~ /\w/ && !defined ($mapping{'languagemap'}{$languages})) {
282 $mapping{'languagemap'}{$languages} = $plang;
[1973]283 push (@{$mapping{'languagemaporder'}}, $languages);
284 $mapping{$languages} = $plang;
[290]285 }
[4]286 $dirnames{$dirname} = $index;
[8716]287 $pnames{'index'}->{$pindex} = "$level:$gran";
288 $pnames{'subcollection'}->{$psub} = $subcollection;
289 $pnames{'languages'}->{$plang} = $languages;
[4]290 }
291
292 return \%mapping;
293}
294
295
[139]296sub make_unique {
297 my $self = shift (@_);
298 my ($namehash, $index, $indexref, $subref, $langref) = @_;
299 my ($level, $gran, $subcollection, $languages) = split (":", $index);
300
301 if ($namehash->{'index'}->{$$indexref} ne "$level:$gran") {
302 $self->get_next_version ($indexref);
303 } elsif ($namehash->{'subcollection'}->{$$subref} ne $subcollection) {
304 $self->get_next_version ($subref);
305 } elsif ($namehash->{'languages'}->{$$langref} ne $languages) {
306 $self->get_next_version ($langref);
307 }
308 return "$$indexref$$subref$$langref";
309}
310
[4]311sub build_index {
312 my $self = shift (@_);
313 my ($index) = @_;
[1424]314 my $outhandle = $self->{'outhandle'};
[4]315
316 # get the full index directory path and make sure it exists
317 my $indexdir = $self->{'index_mapping'}->{$index};
318 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, $indexdir));
[15003]319
320 my $collect_tail = &util::get_dirsep_tail($self->{'collection'});
[4]321 my $fullindexprefix = &util::filename_cat ($self->{'build_dir'}, $indexdir,
[15003]322 $collect_tail);
[4]323 my $fulltextprefix = &util::filename_cat ($self->{'build_dir'}, "text",
[15003]324 $collect_tail);
[4]325
326 # get any os specific stuff
327 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
328 my $exe = &util::get_os_exe ();
[486]329 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
330 my $mg_perf_hash_build_exe =
331 &util::filename_cat($exedir, "mg_perf_hash_build$exe");
332 my $mg_weights_build_exe =
333 &util::filename_cat ($exedir, "mg_weights_build$exe");
334 my $mg_invf_dict_exe =
335 &util::filename_cat ($exedir, "mg_invf_dict$exe");
336 my $mg_stem_idx_exe =
337 &util::filename_cat ($exedir, "mg_stem_idx$exe");
338
[12340]339 my $maxnumeric = $self->{'maxnumeric'};
[4192]340
[4]341 my $osextra = "";
342 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
[3115]343 $fullindexprefix =~ s@/@\\@g;
[4]344 } else {
345 $osextra = " -d /";
[3115]346 if ($outhandle ne "STDERR") {
347 # so mg_passes doesn't print to stderr if we redirect output
348 $osextra .= " 2>/dev/null";
349 }
[4]350 }
351
352 # get the index level from the index description
353 # the index will be level 2 unless we are building a
354 # paragraph level index
355 my $index_level = 2;
356 $index_level = 3 if $index =~ /^paragraph/i;
357
[69]358 # get the index expression if this index belongs
359 # to a subcollection
360 my $indexexparr = [];
[9669]361 my $langarr = [];
[1973]362 # there may be subcollection info, and language info.
363 my ($level, $fields, $subcollection, $language) = split (":", $index);
[85]364 my @subcollections = ();
365 @subcollections = split /,/, $subcollection if (defined $subcollection);
[69]366
[8716]367 foreach my $subcollection (@subcollections) {
[69]368 if (defined ($self->{'collect_cfg'}->{'subcollection'}->{$subcollection})) {
369 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection});
370 }
371 }
372
[139]373 # add expressions for languages if this index belongs to
[1973]374 # a language subcollection - only put languages expressions for the
375 # ones we want in the index
376
377 my @languages = ();
[9548]378 my $language_metadata = "Language";
379 if (defined ($self->{'collect_cfg'}->{'language_metadata'})) {
380 $language_metadata = $self->{'collect_cfg'}->{'language_metadata'};
381 }
[1973]382 @languages = split /,/, $language if (defined $language);
[8716]383 foreach my $language (@languages) {
[1973]384 my $not=0;
[139]385 if ($language =~ s/^\!//) {
[1973]386 $not = 1;
[139]387 }
[6543]388 if($not) {
[9669]389 push (@$langarr, "!$language");
[6543]390 } else {
[9669]391 push (@$langarr, "$language");
[1973]392 }
[139]393 }
[9669]394
[782]395 # Build index dictionary. Uses verbatim stem method
[1424]396 print $outhandle "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1);
[6407]397 print STDERR "<Phase name='CreatingIndexDic'/>\n" if $self->{'gli'};
[782]398 my ($handle);
399 if ($self->{'debug'}) {
[15716]400 $handle = *STDOUT;
401 }
402 else {
[782]403 if (!-e "$mg_passes_exe" ||
[15716]404 !open($handle, "| mg_passes$exe -f \"$fullindexprefix\" -b $maxdocsize " .
[4192]405 "-$index_level -m 32 -s 0 -G -t 10 -N1 -M $maxnumeric $osextra")) {
[6407]406 print STDERR "<FatalError name='NoRunMGPasses'/>\n</Stage>\n" if $self->{'gli'};
[782]407 die "mgbuilder::build_index - couldn't run $mg_passes_exe\n";
408 }
409 }
410
[4]411 # set up the document processor
[782]412 $self->{'buildproc'}->set_output_handle ($handle);
[4]413 $self->{'buildproc'}->set_mode ('text');
[69]414 $self->{'buildproc'}->set_index ($index, $indexexparr);
[9669]415 $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language);
[292]416 $self->{'buildproc'}->set_indexing_text (1);
[2336]417 $self->{'buildproc'}->set_store_text(1);
[4]418
419 $self->{'buildproc'}->reset();
420 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
[16379]421 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
[782]422 close ($handle) unless $self->{'debug'};
[4]423
[1251]424 $self->print_stats();
425
[5768]426 # now we check to see if the required files have been produced - if not we quit building this index so the whole process doesn't crap out.
427 # we check on the .id file - index dictionary
428 my $dict_file = "$fullindexprefix.id";
429 if (!-e $dict_file) {
430 print $outhandle "mgbuilder::build_index - Couldn't create index $index\n";
431 $self->{'notbuilt'}->{$index}=1;
432 return;
433 }
[782]434 if (!$self->{'debug'}) {
435 # create the perfect hash function
436 if (!-e "$mg_perf_hash_build_exe") {
[6407]437 print STDERR "<FatalError name='NoRunMGHash'/>\n</Stage>\n" if $self->{'gli'};
[782]438 die "mgbuilder::build_index - couldn't run $mg_perf_hash_build_exe\n";
439 }
[1679]440 system ("mg_perf_hash_build$exe -f \"$fullindexprefix\" $osextra");
[782]441
442 if (!-e "$mg_passes_exe" ||
[1679]443 !open ($handle, "| mg_passes$exe -f \"$fullindexprefix\" -b $maxdocsize " .
[4192]444 "-$index_level -c 3 -G -t 10 -N2 -M $maxnumeric $osextra")) {
[6407]445 print STDERR "<FatalError name='NoRunMGPasses'/>\n</Stage>\n" if $self->{'gli'};
[782]446 die "mgbuilder::build_index - couldn't run $mg_passes_exe\n";
447 }
[4]448 }
[782]449
[4]450 # invert the text
[1424]451 print $outhandle "\n inverting the text\n" if ($self->{'verbosity'} >= 1);
[6407]452 print STDERR "<Phase name='InvertingText'/>\n" if $self->{'gli'};
[4]453 $self->{'buildproc'}->reset();
454 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
[16379]455 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
[1304]456
[7904]457
[1251]458 $self->print_stats ();
459
[782]460 if (!$self->{'debug'}) {
[4]461
[782]462 close ($handle);
463
464 # create the weights file
[1424]465 print $outhandle "\n create the weights file\n" if ($self->{'verbosity'} >= 1);
[6407]466 print STDERR "<Phase name='CreateTheWeights'/>\n" if $self->{'gli'};
[782]467 if (!-e "$mg_weights_build_exe") {
[6407]468 print STDERR "<FatalError name='NoRunMGWeights'/>\n</Stage>\n" if $self->{'gli'};
[782]469 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n";
470 }
[1679]471 system ("mg_weights_build$exe -f \"$fullindexprefix\" -t \"$fulltextprefix\" $osextra");
[4]472
[782]473 # create 'on-disk' stemmed dictionary
[1424]474 print $outhandle "\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1);
[6407]475 print STDERR "<Phase name='CreateStemmedDic'/>\n" if $self->{'gli'};
[782]476 if (!-e "$mg_invf_dict_exe") {
[6407]477 print STDERR "<FatalError name='NoRunMGInvf'/>\n</Stage>\n" if $self->{'gli'};
[782]478 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n";
479 }
[1679]480 system ("mg_invf_dict$exe -f \"$fullindexprefix\" $osextra");
[4]481
482
[782]483 # creates stem index files for the various stemming methods
[1424]484 print $outhandle "\n creating stem indexes\n" if ($self->{'verbosity'} >= 1);
[6407]485 print STDERR "<Phase name='CreatingStemIndx'/>\n" if $self->{'gli'};
[782]486 if (!-e "$mg_stem_idx_exe") {
[6407]487 print STDERR "<FatalError name='NoRunMGStem'/>\n</Stage>\n" if $self->{'gli'};
[782]488 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n";
489 }
[12910]490 # currently mg wont work if we don't generate all the stem idexes
491 # so we generate them whatever, but don't advertise the fact
492 #if ($self->{'casefold'}) {
[12971]493 system ("mg_stem_idx$exe -b 4096 -s1 -f \"$fullindexprefix\" $osextra");
[12910]494 #}
495 #if ($self->{'stem'}) {
[12971]496 system ("mg_stem_idx$exe -b 4096 -s2 -f \"$fullindexprefix\" $osextra");
[12910]497 #}
498 #if ($self->{'casefold'} && $self->{'stem'}) {
[12971]499 system ("mg_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra");
[12910]500 #}
501
[782]502 # remove unwanted files
503 my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir);
504 opendir (DIR, $tmpdir) || die
505 "mgbuilder::build_index - couldn't read directory $tmpdir\n";
[8716]506 foreach my $file (readdir(DIR)) {
[782]507 next if $file =~ /^\./;
508 my ($suffix) = $file =~ /\.([^\.]+)$/;
509 if (defined $suffix && !defined $wanted_index_files{$suffix}) {
510 # delete it!
[1424]511 print $outhandle "deleting $file\n" if $self->{'verbosity'} > 2;
[782]512 &util::rm (&util::filename_cat ($tmpdir, $file));
513 }
[4]514 }
[782]515 closedir (DIR);
[4]516 }
[6407]517 print STDERR "</Stage>\n" if $self->{'gli'};
[4]518}
519
[10468]520sub build_cfg_extra {
521 my $self = shift(@_);
522 my ($build_cfg) = @_;
[2506]523
[1252]524 # get additional stats from mg
525 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
526 my $exe = &util::get_os_exe ();
527 my $mgstat_exe = &util::filename_cat($exedir, "mgstat$exe");
[15003]528
529 my $collect_tail = &util::get_dirsep_tail($self->{'collection'});
530 my $input_file = &util::filename_cat ("text", $collect_tail);
[1679]531 if (!-e "$mgstat_exe" || !open (PIPEIN, "mgstat$exe -d \"$self->{'build_dir'}\" -f \"$input_file\" |")) {
[15716]532 my $outhandle = $self->{'outhandle'};
[1424]533 print $outhandle "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";
[1252]534 } else {
535 my $line = "";
536 while (defined ($line = <PIPEIN>)) {
537 if ($line =~ /^Words in collection \[dict\]\s+:\s+(\d+)/) {
538 ($build_cfg->{'numwords'}) = $1;
539 } elsif ($line =~ /^Documents\s+:\s+(\d+)/) {
540 ($build_cfg->{'numsections'}) = $1;
541 }
542 }
543 close PIPEIN;
544 }
[4]545}
546
5471;
[7904]548
549
550
Note: See TracBrowser for help on using the repository browser.