source: trunk/gsdl/perllib/mgbuilder.pm@ 593

Last change on this file since 593 was 537, checked in by sjboddie, 25 years ago

added GPL headers

  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
Line 
1###########################################################################
2#
3# mgbuilder.pm -- MGBuilder object
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package mgbuilder;
27
28use classify;
29use cfgread;
30use colcfg;
31use plugin;
32use util;
33
34$maxdocsize = 12000;
35
36%wanted_index_files = ('td'=>1,
37 't'=>1,
38 'idb'=>1,
39 'ib1'=>1,
40 'ib2'=>1,
41 'ib3'=>1,
42 'i'=>1,
43 'ip'=>1,
44 'tiw'=>1,
45 'wa'=>1);
46
47
48sub new {
49 my ($class, $collection, $source_dir, $build_dir,
50 $verbosity, $maxdocs, $allclassifications) = @_;
51
52 # create an mgbuilder object
53 my $self = bless {'collection'=>$collection,
54 'source_dir'=>$source_dir,
55 'build_dir'=>$build_dir,
56 'verbosity'=>$verbosity,
57 'maxdocs'=>$maxdocs,
58 'allclassifications'=>$allclassifications,
59 'notbuilt'=>[] # indexes not built
60 }, $class;
61
62
63 # read in the collection configuration file
64 my $colcfgname = "$ENV{'GSDLCOLLECTDIR'}/etc/collect.cfg";
65 if (!-e $colcfgname) {
66 die "mgbuilder::new - couldn't find collect.cfg for collection $collection\n";
67 }
68 $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname);
69
70 # sort out subcollection indexes
71 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
72 my $indexes = $self->{'collect_cfg'}->{'indexes'};
73 $self->{'collect_cfg'}->{'indexes'} = [];
74 foreach $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
75 foreach $index (@$indexes) {
76 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
77 }
78 }
79 }
80
81 # sort out language subindexes
82 if (defined $self->{'collect_cfg'}->{'languages'}) {
83 my $indexes = $self->{'collect_cfg'}->{'indexes'};
84 $self->{'collect_cfg'}->{'indexes'} = [];
85 foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) {
86 foreach $index (@$indexes) {
87 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
88 }
89 }
90 }
91
92 # get the list of plugins for this collection
93 my @plugins = (); # some good choice of plugins .... ????
94 if (defined $self->{'collect_cfg'}->{'plugins'}) {
95 @plugins = @{$self->{'collect_cfg'}->{'plugins'}};
96 }
97
98
99 # load all the plugins
100 $self->{'pluginfo'} = &plugin::load_plugins ($collection, \@plugins);
101 if (scalar(@{$self->{'pluginfo'}}) == 0) {
102 print STDERR "No plugins were loaded.\n";
103 die "\n";
104 }
105
106 # load all the classifiers
107 $self->{'classifiers'} = [];
108 if (open (COLCFG, $colcfgname)) {
109 while (defined ($line = &cfgread::read_cfg_line('mgbuilder::COLCFG'))) {
110 if (scalar(@$line) >= 2) {
111 my $key = shift (@$line);
112 if ($key eq "classify") {
113 my $classinfo = &classify::load_classifier($line);
114 push (@{$self->{'classifiers'}}, $classinfo)
115 if defined $classinfo;
116 }
117 }
118 }
119 close (COLCFG);
120 } else {
121 print STDERR "mgbuilder::new couldn't read the cfg file $colcfgname\n";
122 print STDERR " no classifiers were loaded\n";
123 }
124
125 # set the classifytype to use for displaying documents - if the doctype field hasn't
126 # been set in the collect.cfg then the receptionist currently defaults to displaying
127 # documents as 'Book'
128 if (open (COLCFG, $colcfgname)) {
129 while (defined ($line = &cfgread::read_cfg_line('mgbuilder::COLCFG'))) {
130 if (scalar(@$line) == 2) {
131 my $key = shift (@$line);
132 if ($key eq "doctype") {
133 $self->{'classifytype'} = shift (@$line);
134 }
135 }
136 }
137 close (COLCFG);
138 }
139
140 # load up the document processor for building
141 # if a buildproc class has been created for this collection, use it
142 # otherwise, use the mg buildproc
143 my ($buildprocdir, $buildproctype);
144 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
145 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
146 $buildproctype = "${collection}buildproc";
147 } else {
148 $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
149 $buildproctype = "mgbuildproc";
150 }
151 require "$buildprocdir/$buildproctype.pm";
152
153 eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " .
154 "\$source_dir, \$build_dir, \$verbosity)");
155 die "$@" if $@;
156
157
158 return $self;
159}
160
161sub init {
162 my $self = shift (@_);
163
164 # remove any old builds
165 &util::rm_r($self->{'build_dir'});
166 &util::mk_all_dir($self->{'build_dir'});
167
168 # make the text directory
169 my $textdir = "$self->{'build_dir'}/text";
170 &util::mk_all_dir($textdir);
171}
172
173sub compress_text {
174 my $self = shift (@_);
175 my ($textindex) = @_;
176 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
177 my $exe = &util::get_os_exe ();
178 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
179 my $mg_compression_dict_exe = &util::filename_cat($exedir, "mg_compression_dict$exe");
180
181 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, "text"));
182 my $basefilename = "text/$self->{'collection'}";
183 my $fulltextprefix = &util::filename_cat ($self->{'build_dir'}, $basefilename);
184
185 my $osextra = "";
186 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
187 $fulltextprefix =~ s/\//\\/g;
188 } else {
189 $osextra = " -d /";
190 }
191
192 print STDERR "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
193
194 # set up the document processor
195 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT');
196 $self->{'buildproc'}->set_mode ('text');
197 $self->{'buildproc'}->set_index ($textindex);
198 $self->{'buildproc'}->set_indexing_text (0);
199
200 # collect the statistics for the text
201 # -b $maxdocsize sets the maximum document size to be 12 meg
202 print STDERR "\n collecting text statistics\n" if ($self->{'verbosity'} >= 1);
203 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
204 "| $mg_passes_exe -f $fulltextprefix -b $maxdocsize -T1 $osextra")) {
205 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n";
206 }
207 $self->{'buildproc'}->reset();
208 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
209 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
210 close (PIPEOUT);
211
212 # create the compression dictionary
213 # the compression dictionary is built by assuming the stats are from a seed
214 # dictionary (-S), if a novel word is encountered it is spelled out (-H),
215 # and the resulting dictionary must be less than 5 meg with the most frequent
216 # words being put into the dictionary first (-2 -k 5120)
217 print STDERR "\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1);
218 if (!-e "$mg_compression_dict_exe") {
219 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n";
220 }
221 system ("$mg_compression_dict_exe -f $fulltextprefix -S -H -2 -k 5120 $osextra");
222
223 # compress the text
224 # -b $maxdocsize sets the maximum document size to be 12 meg
225 print STDERR "\n compressing the text\n" if ($self->{'verbosity'} >= 1);
226 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
227 "| $mg_passes_exe -f $fulltextprefix -b $maxdocsize -T2 $osextra")) {
228 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n";
229 }
230 $self->{'buildproc'}->reset();
231 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
232 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
233 close (PIPEOUT);
234}
235
236sub want_built {
237 my $self = shift (@_);
238 my ($index) = @_;
239
240 if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
241 foreach $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
242 if ($index =~ /^$checkstr$/) {
243 push (@{$self->{'notbuilt'}}, $self->{'index_mapping'}->{$index});
244 return 0;
245 }
246 }
247 }
248
249 return 1;
250}
251
252sub build_indexes {
253 my $self = shift (@_);
254 my $indexes = $self->{'collect_cfg'}->{'indexes'};
255
256 # create the mapping between the index descriptions
257 # and their directory names
258 $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
259
260 # build each of the indexes
261 foreach $index (@$indexes) {
262 if ($self->want_built($index)) {
263 print STDERR "\n*** building index $index in subdirectory " .
264 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
265 $self->build_index($index);
266 } else {
267 print STDERR "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
268 }
269 }
270}
271
272# creates directory names for each of the index descriptions
273sub create_index_mapping {
274 my $self = shift (@_);
275 my ($indexes) = @_;
276
277 my %mapping = ();
278 $mapping{'indexmaporder'} = [];
279 $mapping{'subcollectionmaporder'} = [];
280 $mapping{'languagemaporder'} = [];
281
282 # dirnames is used to check for collisions. Start this off
283 # with the manditory directory names
284 my %dirnames = ('text'=>'text',
285 'extra'=>'extra');
286 my %pnames = ('index' => '', 'subcollection' => '', 'languages' => '');
287
288 foreach $index (@$indexes) {
289 my ($level, $gran, $subcollection, $languages) = split (":", $index);
290
291 # the directory name starts with the first character of the index level
292 my ($pindex) = $level =~ /^(.)/;
293
294 # next comes a processed version of the index
295 $pindex .= $self->process_field ($gran);
296 $pindex = lc ($pindex);
297
298 # next comes a processed version of the subcollection if there is one.
299 my $psub = $self->process_field ($subcollection);
300 $psub = lc ($psub);
301
302 # next comes a processed version of the language if there is one.
303 my $plang = $self->process_field ($languages);
304 $plang = lc ($plang);
305
306 my $dirname = $pindex . $psub . $plang;
307
308 # check to be sure all index names are unique
309 while (defined ($dirnames{$dirname})) {
310 $dirname = $self->make_unique (\%pnames, $index, \$pindex, \$psub, \$plang);
311 }
312
313 # store the mapping orders as well as the maps
314 if (!defined $mapping{'indexmap'}{"$level:$gran"}) {
315 $mapping{'indexmap'}{"$level:$gran"} = $pindex;
316 push (@{$mapping{'indexmaporder'}}, "$level:$gran");
317 }
318 if ($psub =~ /\w/ && !defined ($mapping{'subcollectionmap'}{$subcollection})) {
319 $mapping{'subcollectionmap'}{$subcollection} = $psub;
320 push (@{$mapping{'subcollectionmaporder'}}, $subcollection);
321 }
322 if ($plang =~ /\w/ && !defined ($mapping{'languagemap'}{$languages})) {
323 $mapping{'languagemap'}{$languages} = $plang;
324 push (@{$mapping{'languagemaporder'}}, $language);
325 }
326 $mapping{$index} = $dirname;
327 $dirnames{$dirname} = $index;
328 $pnames{'index'}{$pindex} = "$level:$gran";
329 $pnames{'subcollection'}{$psub} = $subcollection;
330 $pnames{'languages'}{$plang} = $languages;
331 }
332
333 return \%mapping;
334}
335
336# returns a processed version of a field.
337# if the field has only one component the processed
338# version will contain the first character and next consonant
339# of that componant - otherwise it will contain the first
340# character of the first two components
341sub process_field {
342 my $self = shift (@_);
343 my ($field) = @_;
344
345 return "" unless (defined ($field) && $field =~ /\w/);
346
347 my @components = split /,/, $field;
348 if (scalar @components >= 2) {
349 splice (@components, 2);
350 map {s/^(.).*$/$1/;} @components;
351 return join("", @components);
352 } else {
353 my ($a, $b) = $field =~ /^(.).*?([bcdfghjklmnpqrstvwxyz])/i;
354 ($a, $b) = $field =~ /^(.)(.)/ unless defined $a && defined $b;
355 return "$a$b";
356 }
357}
358
359sub make_unique {
360 my $self = shift (@_);
361 my ($namehash, $index, $indexref, $subref, $langref) = @_;
362 my ($level, $gran, $subcollection, $languages) = split (":", $index);
363
364 if ($namehash->{'index'}->{$$indexref} ne "$level:$gran") {
365 $self->get_next_version ($indexref);
366 } elsif ($namehash->{'subcollection'}->{$$subref} ne $subcollection) {
367 $self->get_next_version ($subref);
368 } elsif ($namehash->{'languages'}->{$$langref} ne $languages) {
369 $self->get_next_version ($langref);
370 }
371 return "$$indexref$$subref$$langref";
372}
373
374sub get_next_version {
375 my $self = shift (@_);
376 my ($nameref) = @_;
377
378 if ($$nameref =~ /(\d\d)$/) {
379 my $num = $1; $num ++;
380 $$nameref =~ s/\d\d$/$num/;
381 } elsif ($$nameref =~ /(\d)$/) {
382 my $num = $1;
383 if ($num == 9) {$$nameref =~ s/\d\d$/10/;}
384 else {$num ++; $$nameref =~ s/\d$/$num/;}
385 } else {
386 $$nameref =~ s/.$/0/;
387 }
388}
389
390sub build_index {
391 my $self = shift (@_);
392 my ($index) = @_;
393
394 # get the full index directory path and make sure it exists
395 my $indexdir = $self->{'index_mapping'}->{$index};
396 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, $indexdir));
397 my $fullindexprefix = &util::filename_cat ($self->{'build_dir'}, $indexdir,
398 $self->{'collection'});
399 my $fulltextprefix = &util::filename_cat ($self->{'build_dir'}, "text",
400 $self->{'collection'});
401
402 # get any os specific stuff
403 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
404 my $exe = &util::get_os_exe ();
405 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
406 my $mg_perf_hash_build_exe =
407 &util::filename_cat($exedir, "mg_perf_hash_build$exe");
408 my $mg_weights_build_exe =
409 &util::filename_cat ($exedir, "mg_weights_build$exe");
410 my $mg_invf_dict_exe =
411 &util::filename_cat ($exedir, "mg_invf_dict$exe");
412 my $mg_stem_idx_exe =
413 &util::filename_cat ($exedir, "mg_stem_idx$exe");
414
415 my $osextra = "";
416 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
417 $fullindexprefix =~ s/\//\\/g;
418 } else {
419 $osextra = " -d /";
420 }
421
422 # get the index level from the index description
423 # the index will be level 2 unless we are building a
424 # paragraph level index
425 my $index_level = 2;
426 $index_level = 3 if $index =~ /^paragraph/i;
427
428 # get the index expression if this index belongs
429 # to a subcollection
430 my $indexexparr = [];
431 my ($level, $fields, $subcollection) = split (":", $index);
432 my @subcollections = ();
433 @subcollections = split /,/, $subcollection if (defined $subcollection);
434
435 foreach $subcollection (@subcollections) {
436 if (defined ($self->{'collect_cfg'}->{'subcollection'}->{$subcollection})) {
437 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection});
438 }
439 }
440
441 # add expressions for languages if this index belongs to
442 # a language subcollection
443 foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) {
444 if ($language =~ s/^\!//) {
445 push (@$indexexparr, "!Language/$language/");
446 } else {
447 push (@$indexexparr, "Language/$language/");
448 }
449 }
450
451 # set up the document processor
452 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT');
453 $self->{'buildproc'}->set_mode ('text');
454 $self->{'buildproc'}->set_index ($index, $indexexparr);
455 $self->{'buildproc'}->set_indexing_text (1);
456
457
458 # Build index dictionary. Uses verbatim stem method
459 print STDERR "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1);
460 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
461 "| $mg_passes_exe -f $fullindexprefix -b $maxdocsize " .
462 "-$index_level -m 32 -s 0 -G -t 10 -N1 $osextra")) {
463 die "mgbuilder::build_index - couldn't run $mg_passes_exe\n";
464 }
465 $self->{'buildproc'}->reset();
466 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
467 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
468 close (PIPEOUT);
469
470 # create the perfect hash function
471 if (!-e "$mg_perf_hash_build_exe") {
472 die "mgbuilder::build_index - couldn't run $mg_perf_hash_build_exe\n";
473 }
474 system ("$mg_perf_hash_build_exe -f $fullindexprefix $osextra");
475
476 # invert the text
477 print STDERR "\n inverting the text\n" if ($self->{'verbosity'} >= 1);
478 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
479 "| $mg_passes_exe -f $fullindexprefix -b $maxdocsize " .
480 "-$index_level -c 3 -G -t 10 -N2 $osextra")) {
481 die "mgbuilder::build_index - couldn't run $mg_passes_exe\n";
482 }
483 $self->{'buildproc'}->reset();
484 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
485 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
486 close (PIPEOUT);
487
488 # create the weights file
489 print STDERR "\n create the weights file\n" if ($self->{'verbosity'} >= 1);
490 if (!-e "$mg_weights_build_exe") {
491 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n";
492 }
493 system ("$mg_weights_build_exe -f $fullindexprefix -t $fulltextprefix $osextra");
494
495 # create 'on-disk' stemmed dictionary
496 print STDERR "\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1);
497 if (!-e "$mg_invf_dict_exe") {
498 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n";
499 }
500 system ("$mg_invf_dict_exe -f $fullindexprefix $osextra");
501
502
503 # creates stem index files for the various stemming methods
504 print STDERR "\n creating stem indexes\n" if ($self->{'verbosity'} >= 1);
505 if (!-e "$mg_stem_idx_exe") {
506 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n";
507 }
508 system ("$mg_stem_idx_exe -b 4096 -s1 -f $fullindexprefix $osextra");
509 system ("$mg_stem_idx_exe -b 4096 -s2 -f $fullindexprefix $osextra");
510 system ("$mg_stem_idx_exe -b 4096 -s3 -f $fullindexprefix $osextra");
511
512
513 # remove unwanted files
514 my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir);
515 opendir (DIR, $tmpdir) || die
516 "mgbuilder::build_index - couldn't read directory $tmpdir\n";
517 foreach $file (readdir(DIR)) {
518 next if $file =~ /^\./;
519 my ($suffix) = $file =~ /\.([^\.]+)$/;
520 if (defined $suffix && !defined $wanted_index_files{$suffix}) {
521 # delete it!
522# print STDERR "deleting $file\n";
523 &util::rm (&util::filename_cat ($tmpdir, $file));
524 }
525 }
526 closedir (DIR);
527}
528
529sub make_infodatabase {
530 my $self = shift (@_);
531 my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
532 &util::mk_all_dir ($textdir);
533
534 # get db name
535 my $dbext = ".bdb";
536 $dbext = ".ldb" if &util::is_little_endian();
537 my $fulldbname = &util::filename_cat ($textdir, "$self->{'collection'}$dbext");
538 $fulldbname =~ s/\//\\/g if ($ENV{'GSDLOS'} =~ /^windows$/i);
539
540 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
541 my $exe = &util::get_os_exe ();
542 my $txt2db_exe = &util::filename_cat($exedir, "txt2db$exe");
543
544 print STDERR "\n*** creating the info database\n" if ($self->{'verbosity'} >= 1);
545
546 # init all the classifiers
547 &classify::init_classifiers ($self->{'classifiers'});
548
549 # set up the document processor
550 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT');
551 $self->{'buildproc'}->set_mode ('infodb');
552 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
553 $self->{'buildproc'}->set_indexing_text (0);
554
555 # create the infodatabase
556 if (!-e "$txt2db_exe" || !open (PIPEOUT,
557 "| $txt2db_exe $fulldbname")) {
558 die "mgbuilder::make_infodatabase - couldn't run $txt2db_exe\n";
559 }
560 $self->{'buildproc'}->reset();
561
562 if (defined $self->{'collect_cfg'}->{'collectionmeta'}) {
563
564 if (!defined $self->{'index_mapping'}) {
565 $self->{'index_mapping'} =
566 $self->create_index_mapping ($self->{'collect_cfg'}->{'indexes'});
567 }
568
569 print PIPEOUT "[collection]\n";
570
571 foreach $cmeta (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}})) {
572 if ($cmeta =~ s/^\.//) {
573 if (defined $self->{'index_mapping'}->{$cmeta}) {
574 print PIPEOUT "<$self->{'index_mapping'}->{$cmeta}>" .
575 $self->{'collect_cfg'}->{'collectionmeta'}->{".$cmeta"} . "\n";
576 } else {
577 print STDERR "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";
578 }
579 } else {
580 print PIPEOUT "<$cmeta>$self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}\n";
581 }
582 }
583 print PIPEOUT "\n" . ('-' x 70) . "\n";
584
585 }
586
587
588 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
589 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
590
591 # output classification information
592 &classify::output_classify_info ($self->{'classifiers'}, 'mgbuilder::PIPEOUT',
593 $self->{'allclassifications'});
594
595 close (PIPEOUT);
596}
597
598sub make_auxiliary_files {
599 my $self = shift (@_);
600 my ($index);
601 my %build_cfg = ();
602
603 print STDERR "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
604
605 # get the text directory
606 &util::mk_all_dir ($self->{'build_dir'});
607
608 # store the build date
609 $build_cfg->{'builddate'} = time;
610
611 # store the number of documents and number of bytes
612 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
613 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
614
615 # store the mapping between the index names and the directory names
616 my @indexmap = ();
617 foreach $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
618 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
619 }
620 $build_cfg->{'indexmap'} = \@indexmap;
621
622 my @subcollectionmap = ();
623 foreach $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
624 push (@subcollectionmap, "$subcollection\-\>" .
625 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
626 }
627 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
628
629 my @languagemap = ();
630 foreach $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
631 push (@languagemap, "$language\-\>" .
632 $self->{'index_mapping'}->{'languagemap'}->{$language});
633 }
634 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
635
636 $build_cfg->{'notbuilt'} = $self->{'notbuilt'};
637
638 # write out the build information
639 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
640 '^(builddate|numdocs|numbytes)$', '^(indexmap|subcollectionmap|languagemap|notbuilt)$');
641
642}
643
644sub deinit {
645 my $self = shift (@_);
646}
647
648
6491;
650
651
Note: See TracBrowser for help on using the repository browser.