source: trunk/gsdl/perllib/mgbuilder.pm@ 677

Last change on this file since 677 was 677, checked in by sjboddie, 25 years ago

changes for new browsing support

  • Property svn:keywords set to Author Date Id Revision
File size: 21.5 KB
Line 
1###########################################################################
2#
3# mgbuilder.pm -- MGBuilder object
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package mgbuilder;
27
28use classify;
29use cfgread;
30use colcfg;
31use plugin;
32use util;
33
34$maxdocsize = 12000;
35
36%wanted_index_files = ('td'=>1,
37 't'=>1,
38 'idb'=>1,
39 'ib1'=>1,
40 'ib2'=>1,
41 'ib3'=>1,
42 'i'=>1,
43 'ip'=>1,
44 'tiw'=>1,
45 'wa'=>1);
46
47
48sub new {
49 my ($class, $collection, $source_dir, $build_dir,
50 $verbosity, $maxdocs, $allclassifications) = @_;
51
52 # create an mgbuilder object
53 my $self = bless {'collection'=>$collection,
54 'source_dir'=>$source_dir,
55 'build_dir'=>$build_dir,
56 'verbosity'=>$verbosity,
57 'maxdocs'=>$maxdocs,
58 'allclassifications'=>$allclassifications,
59 'notbuilt'=>[] # indexes not built
60 }, $class;
61
62
63 # read in the collection configuration file
64 my $colcfgname = "$ENV{'GSDLCOLLECTDIR'}/etc/collect.cfg";
65 if (!-e $colcfgname) {
66 die "mgbuilder::new - couldn't find collect.cfg for collection $collection\n";
67 }
68 $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname);
69
70 # sort out subcollection indexes
71 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) {
72 my $indexes = $self->{'collect_cfg'}->{'indexes'};
73 $self->{'collect_cfg'}->{'indexes'} = [];
74 foreach $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) {
75 foreach $index (@$indexes) {
76 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection");
77 }
78 }
79 }
80
81 # sort out language subindexes
82 if (defined $self->{'collect_cfg'}->{'languages'}) {
83 my $indexes = $self->{'collect_cfg'}->{'indexes'};
84 $self->{'collect_cfg'}->{'indexes'} = [];
85 foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) {
86 foreach $index (@$indexes) {
87 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
88 }
89 }
90 }
91
92 # get the list of plugins for this collection
93 my @plugins = (); # some good choice of plugins .... ????
94 if (defined $self->{'collect_cfg'}->{'plugins'}) {
95 @plugins = @{$self->{'collect_cfg'}->{'plugins'}};
96 }
97
98
99 # load all the plugins
100 $self->{'pluginfo'} = &plugin::load_plugins ($collection, \@plugins);
101 if (scalar(@{$self->{'pluginfo'}}) == 0) {
102 print STDERR "No plugins were loaded.\n";
103 die "\n";
104 }
105
106 # load all the classifiers
107 $self->{'classifiers'} = [];
108 if (open (COLCFG, $colcfgname)) {
109 while (defined ($line = &cfgread::read_cfg_line('mgbuilder::COLCFG'))) {
110 if (scalar(@$line) >= 2) {
111 my $key = shift (@$line);
112 if ($key eq "classify") {
113 my $classinfo = &classify::load_classifier($line);
114 push (@{$self->{'classifiers'}}, $classinfo)
115 if defined $classinfo;
116 }
117 }
118 }
119 close (COLCFG);
120 } else {
121 print STDERR "mgbuilder::new couldn't read the cfg file $colcfgname\n";
122 print STDERR " no classifiers were loaded\n";
123 }
124
125 # load up the document processor for building
126 # if a buildproc class has been created for this collection, use it
127 # otherwise, use the mg buildproc
128 my ($buildprocdir, $buildproctype);
129 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") {
130 $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
131 $buildproctype = "${collection}buildproc";
132 } else {
133 $buildprocdir = "$ENV{'GSDLHOME'}/perllib";
134 $buildproctype = "mgbuildproc";
135 }
136 require "$buildprocdir/$buildproctype.pm";
137
138 eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " .
139 "\$source_dir, \$build_dir, \$verbosity)");
140 die "$@" if $@;
141
142
143 return $self;
144}
145
146sub init {
147 my $self = shift (@_);
148
149 # remove any old builds
150 &util::rm_r($self->{'build_dir'});
151 &util::mk_all_dir($self->{'build_dir'});
152
153 # make the text directory
154 my $textdir = "$self->{'build_dir'}/text";
155 &util::mk_all_dir($textdir);
156}
157
158sub compress_text {
159 my $self = shift (@_);
160 my ($textindex) = @_;
161 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
162 my $exe = &util::get_os_exe ();
163 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
164 my $mg_compression_dict_exe = &util::filename_cat($exedir, "mg_compression_dict$exe");
165
166 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, "text"));
167 my $basefilename = "text/$self->{'collection'}";
168 my $fulltextprefix = &util::filename_cat ($self->{'build_dir'}, $basefilename);
169
170 my $osextra = "";
171 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
172 $fulltextprefix =~ s/\//\\/g;
173 } else {
174 $osextra = " -d /";
175 }
176
177 print STDERR "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
178
179 # set up the document processor
180 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT');
181 $self->{'buildproc'}->set_mode ('text');
182 $self->{'buildproc'}->set_index ($textindex);
183 $self->{'buildproc'}->set_indexing_text (0);
184
185 # collect the statistics for the text
186 # -b $maxdocsize sets the maximum document size to be 12 meg
187 print STDERR "\n collecting text statistics\n" if ($self->{'verbosity'} >= 1);
188 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
189 "| $mg_passes_exe -f $fulltextprefix -b $maxdocsize -T1 $osextra")) {
190 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n";
191 }
192 $self->{'buildproc'}->reset();
193 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
194 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
195 close (PIPEOUT);
196
197 # create the compression dictionary
198 # the compression dictionary is built by assuming the stats are from a seed
199 # dictionary (-S), if a novel word is encountered it is spelled out (-H),
200 # and the resulting dictionary must be less than 5 meg with the most frequent
201 # words being put into the dictionary first (-2 -k 5120)
202 print STDERR "\n creating the compression dictionary\n" if ($self->{'verbosity'} >= 1);
203 if (!-e "$mg_compression_dict_exe") {
204 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n";
205 }
206 system ("$mg_compression_dict_exe -f $fulltextprefix -S -H -2 -k 5120 $osextra");
207
208 # compress the text
209 # -b $maxdocsize sets the maximum document size to be 12 meg
210 print STDERR "\n compressing the text\n" if ($self->{'verbosity'} >= 1);
211 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
212 "| $mg_passes_exe -f $fulltextprefix -b $maxdocsize -T2 $osextra")) {
213 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n";
214 }
215 $self->{'buildproc'}->reset();
216 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
217 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
218 close (PIPEOUT);
219}
220
221sub want_built {
222 my $self = shift (@_);
223 my ($index) = @_;
224
225 if (defined ($self->{'collect_cfg'}->{'dontbuild'})) {
226 foreach $checkstr (@{$self->{'collect_cfg'}->{'dontbuild'}}) {
227 if ($index =~ /^$checkstr$/) {
228 push (@{$self->{'notbuilt'}}, $self->{'index_mapping'}->{$index});
229 return 0;
230 }
231 }
232 }
233
234 return 1;
235}
236
237sub build_indexes {
238 my $self = shift (@_);
239 my $indexes = $self->{'collect_cfg'}->{'indexes'};
240
241 # create the mapping between the index descriptions
242 # and their directory names
243 $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
244
245 # build each of the indexes
246 foreach $index (@$indexes) {
247 if ($self->want_built($index)) {
248 print STDERR "\n*** building index $index in subdirectory " .
249 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1);
250 $self->build_index($index);
251 } else {
252 print STDERR "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1);
253 }
254 }
255}
256
257# creates directory names for each of the index descriptions
258sub create_index_mapping {
259 my $self = shift (@_);
260 my ($indexes) = @_;
261
262 my %mapping = ();
263 $mapping{'indexmaporder'} = [];
264 $mapping{'subcollectionmaporder'} = [];
265 $mapping{'languagemaporder'} = [];
266
267 # dirnames is used to check for collisions. Start this off
268 # with the manditory directory names
269 my %dirnames = ('text'=>'text',
270 'extra'=>'extra');
271 my %pnames = ('index' => '', 'subcollection' => '', 'languages' => '');
272
273 foreach $index (@$indexes) {
274 my ($level, $gran, $subcollection, $languages) = split (":", $index);
275
276 # the directory name starts with the first character of the index level
277 my ($pindex) = $level =~ /^(.)/;
278
279 # next comes a processed version of the index
280 $pindex .= $self->process_field ($gran);
281 $pindex = lc ($pindex);
282
283 # next comes a processed version of the subcollection if there is one.
284 my $psub = $self->process_field ($subcollection);
285 $psub = lc ($psub);
286
287 # next comes a processed version of the language if there is one.
288 my $plang = $self->process_field ($languages);
289 $plang = lc ($plang);
290
291 my $dirname = $pindex . $psub . $plang;
292
293 # check to be sure all index names are unique
294 while (defined ($dirnames{$dirname})) {
295 $dirname = $self->make_unique (\%pnames, $index, \$pindex, \$psub, \$plang);
296 }
297
298 # store the mapping orders as well as the maps
299 if (!defined $mapping{'indexmap'}{"$level:$gran"}) {
300 $mapping{'indexmap'}{"$level:$gran"} = $pindex;
301 push (@{$mapping{'indexmaporder'}}, "$level:$gran");
302 }
303 if ($psub =~ /\w/ && !defined ($mapping{'subcollectionmap'}{$subcollection})) {
304 $mapping{'subcollectionmap'}{$subcollection} = $psub;
305 push (@{$mapping{'subcollectionmaporder'}}, $subcollection);
306 }
307 if ($plang =~ /\w/ && !defined ($mapping{'languagemap'}{$languages})) {
308 $mapping{'languagemap'}{$languages} = $plang;
309 push (@{$mapping{'languagemaporder'}}, $language);
310 }
311 $mapping{$index} = $dirname;
312 $dirnames{$dirname} = $index;
313 $pnames{'index'}{$pindex} = "$level:$gran";
314 $pnames{'subcollection'}{$psub} = $subcollection;
315 $pnames{'languages'}{$plang} = $languages;
316 }
317
318 return \%mapping;
319}
320
321# returns a processed version of a field.
322# if the field has only one component the processed
323# version will contain the first character and next consonant
324# of that componant - otherwise it will contain the first
325# character of the first two components
326sub process_field {
327 my $self = shift (@_);
328 my ($field) = @_;
329
330 return "" unless (defined ($field) && $field =~ /\w/);
331
332 my @components = split /,/, $field;
333 if (scalar @components >= 2) {
334 splice (@components, 2);
335 map {s/^(.).*$/$1/;} @components;
336 return join("", @components);
337 } else {
338 my ($a, $b) = $field =~ /^(.).*?([bcdfghjklmnpqrstvwxyz])/i;
339 ($a, $b) = $field =~ /^(.)(.)/ unless defined $a && defined $b;
340 return "$a$b";
341 }
342}
343
344sub make_unique {
345 my $self = shift (@_);
346 my ($namehash, $index, $indexref, $subref, $langref) = @_;
347 my ($level, $gran, $subcollection, $languages) = split (":", $index);
348
349 if ($namehash->{'index'}->{$$indexref} ne "$level:$gran") {
350 $self->get_next_version ($indexref);
351 } elsif ($namehash->{'subcollection'}->{$$subref} ne $subcollection) {
352 $self->get_next_version ($subref);
353 } elsif ($namehash->{'languages'}->{$$langref} ne $languages) {
354 $self->get_next_version ($langref);
355 }
356 return "$$indexref$$subref$$langref";
357}
358
359sub get_next_version {
360 my $self = shift (@_);
361 my ($nameref) = @_;
362
363 if ($$nameref =~ /(\d\d)$/) {
364 my $num = $1; $num ++;
365 $$nameref =~ s/\d\d$/$num/;
366 } elsif ($$nameref =~ /(\d)$/) {
367 my $num = $1;
368 if ($num == 9) {$$nameref =~ s/\d\d$/10/;}
369 else {$num ++; $$nameref =~ s/\d$/$num/;}
370 } else {
371 $$nameref =~ s/.$/0/;
372 }
373}
374
375sub build_index {
376 my $self = shift (@_);
377 my ($index) = @_;
378
379 # get the full index directory path and make sure it exists
380 my $indexdir = $self->{'index_mapping'}->{$index};
381 &util::mk_all_dir (&util::filename_cat($self->{'build_dir'}, $indexdir));
382 my $fullindexprefix = &util::filename_cat ($self->{'build_dir'}, $indexdir,
383 $self->{'collection'});
384 my $fulltextprefix = &util::filename_cat ($self->{'build_dir'}, "text",
385 $self->{'collection'});
386
387 # get any os specific stuff
388 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
389 my $exe = &util::get_os_exe ();
390 my $mg_passes_exe = &util::filename_cat($exedir, "mg_passes$exe");
391 my $mg_perf_hash_build_exe =
392 &util::filename_cat($exedir, "mg_perf_hash_build$exe");
393 my $mg_weights_build_exe =
394 &util::filename_cat ($exedir, "mg_weights_build$exe");
395 my $mg_invf_dict_exe =
396 &util::filename_cat ($exedir, "mg_invf_dict$exe");
397 my $mg_stem_idx_exe =
398 &util::filename_cat ($exedir, "mg_stem_idx$exe");
399
400 my $osextra = "";
401 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
402 $fullindexprefix =~ s/\//\\/g;
403 } else {
404 $osextra = " -d /";
405 }
406
407 # get the index level from the index description
408 # the index will be level 2 unless we are building a
409 # paragraph level index
410 my $index_level = 2;
411 $index_level = 3 if $index =~ /^paragraph/i;
412
413 # get the index expression if this index belongs
414 # to a subcollection
415 my $indexexparr = [];
416 my ($level, $fields, $subcollection) = split (":", $index);
417 my @subcollections = ();
418 @subcollections = split /,/, $subcollection if (defined $subcollection);
419
420 foreach $subcollection (@subcollections) {
421 if (defined ($self->{'collect_cfg'}->{'subcollection'}->{$subcollection})) {
422 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection});
423 }
424 }
425
426 # add expressions for languages if this index belongs to
427 # a language subcollection
428 foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) {
429 if ($language =~ s/^\!//) {
430 push (@$indexexparr, "!Language/$language/");
431 } else {
432 push (@$indexexparr, "Language/$language/");
433 }
434 }
435
436 # set up the document processor
437 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT');
438 $self->{'buildproc'}->set_mode ('text');
439 $self->{'buildproc'}->set_index ($index, $indexexparr);
440 $self->{'buildproc'}->set_indexing_text (1);
441
442
443 # Build index dictionary. Uses verbatim stem method
444 print STDERR "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1);
445 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
446 "| $mg_passes_exe -f $fullindexprefix -b $maxdocsize " .
447 "-$index_level -m 32 -s 0 -G -t 10 -N1 $osextra")) {
448 die "mgbuilder::build_index - couldn't run $mg_passes_exe\n";
449 }
450 $self->{'buildproc'}->reset();
451 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
452 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
453 close (PIPEOUT);
454
455 # create the perfect hash function
456 if (!-e "$mg_perf_hash_build_exe") {
457 die "mgbuilder::build_index - couldn't run $mg_perf_hash_build_exe\n";
458 }
459 system ("$mg_perf_hash_build_exe -f $fullindexprefix $osextra");
460
461 # invert the text
462 print STDERR "\n inverting the text\n" if ($self->{'verbosity'} >= 1);
463 if (!-e "$mg_passes_exe" || !open (PIPEOUT,
464 "| $mg_passes_exe -f $fullindexprefix -b $maxdocsize " .
465 "-$index_level -c 3 -G -t 10 -N2 $osextra")) {
466 die "mgbuilder::build_index - couldn't run $mg_passes_exe\n";
467 }
468 $self->{'buildproc'}->reset();
469 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
470 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
471 close (PIPEOUT);
472
473 # create the weights file
474 print STDERR "\n create the weights file\n" if ($self->{'verbosity'} >= 1);
475 if (!-e "$mg_weights_build_exe") {
476 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n";
477 }
478 system ("$mg_weights_build_exe -f $fullindexprefix -t $fulltextprefix $osextra");
479
480 # create 'on-disk' stemmed dictionary
481 print STDERR "\n creating 'on-disk' stemmed dictionary\n" if ($self->{'verbosity'} >= 1);
482 if (!-e "$mg_invf_dict_exe") {
483 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n";
484 }
485 system ("$mg_invf_dict_exe -f $fullindexprefix $osextra");
486
487
488 # creates stem index files for the various stemming methods
489 print STDERR "\n creating stem indexes\n" if ($self->{'verbosity'} >= 1);
490 if (!-e "$mg_stem_idx_exe") {
491 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n";
492 }
493 system ("$mg_stem_idx_exe -b 4096 -s1 -f $fullindexprefix $osextra");
494 system ("$mg_stem_idx_exe -b 4096 -s2 -f $fullindexprefix $osextra");
495 system ("$mg_stem_idx_exe -b 4096 -s3 -f $fullindexprefix $osextra");
496
497
498 # remove unwanted files
499 my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir);
500 opendir (DIR, $tmpdir) || die
501 "mgbuilder::build_index - couldn't read directory $tmpdir\n";
502 foreach $file (readdir(DIR)) {
503 next if $file =~ /^\./;
504 my ($suffix) = $file =~ /\.([^\.]+)$/;
505 if (defined $suffix && !defined $wanted_index_files{$suffix}) {
506 # delete it!
507# print STDERR "deleting $file\n";
508 &util::rm (&util::filename_cat ($tmpdir, $file));
509 }
510 }
511 closedir (DIR);
512}
513
514sub make_infodatabase {
515 my $self = shift (@_);
516 my $textdir = &util::filename_cat($self->{'build_dir'}, "text");
517 &util::mk_all_dir ($textdir);
518
519 # get db name
520 my $dbext = ".bdb";
521 $dbext = ".ldb" if &util::is_little_endian();
522 my $fulldbname = &util::filename_cat ($textdir, "$self->{'collection'}$dbext");
523 $fulldbname =~ s/\//\\/g if ($ENV{'GSDLOS'} =~ /^windows$/i);
524
525 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
526 my $exe = &util::get_os_exe ();
527 my $txt2db_exe = &util::filename_cat($exedir, "txt2db$exe");
528
529 print STDERR "\n*** creating the info database\n" if ($self->{'verbosity'} >= 1);
530
531 # init all the classifiers
532 &classify::init_classifiers ($self->{'classifiers'});
533
534 # set up the document processor
535 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT');
536 $self->{'buildproc'}->set_mode ('infodb');
537 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
538 $self->{'buildproc'}->set_indexing_text (0);
539
540 # create the infodatabase
541 if (!-e "$txt2db_exe" || !open (PIPEOUT,
542 "| $txt2db_exe $fulldbname")) {
543 die "mgbuilder::make_infodatabase - couldn't run $txt2db_exe\n";
544 }
545 $self->{'buildproc'}->reset();
546
547 if (defined $self->{'collect_cfg'}->{'collectionmeta'}) {
548
549 if (!defined $self->{'index_mapping'}) {
550 $self->{'index_mapping'} =
551 $self->create_index_mapping ($self->{'collect_cfg'}->{'indexes'});
552 }
553
554 print PIPEOUT "[collection]\n";
555
556 foreach $cmeta (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}})) {
557 if ($cmeta =~ s/^\.//) {
558 if (defined $self->{'index_mapping'}->{$cmeta}) {
559 print PIPEOUT "<$self->{'index_mapping'}->{$cmeta}>" .
560 $self->{'collect_cfg'}->{'collectionmeta'}->{".$cmeta"} . "\n";
561 } else {
562 print STDERR "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";
563 }
564 } else {
565 print PIPEOUT "<$cmeta>$self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}\n";
566 }
567 }
568 print PIPEOUT "\n" . ('-' x 70) . "\n";
569
570 }
571
572
573 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
574 "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
575
576 # output classification information
577 &classify::output_classify_info ($self->{'classifiers'}, 'mgbuilder::PIPEOUT',
578 $self->{'allclassifications'});
579
580 close (PIPEOUT);
581}
582
583sub collect_specific {
584 my $self = shift (@_);
585}
586
587sub make_auxiliary_files {
588 my $self = shift (@_);
589 my ($index);
590 my %build_cfg = ();
591
592 print STDERR "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
593
594 # get the text directory
595 &util::mk_all_dir ($self->{'build_dir'});
596
597 # store the build date
598 $build_cfg->{'builddate'} = time;
599
600 # store the number of documents and number of bytes
601 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
602 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
603
604 # store the mapping between the index names and the directory names
605 my @indexmap = ();
606 foreach $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
607 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
608 }
609 $build_cfg->{'indexmap'} = \@indexmap;
610
611 my @subcollectionmap = ();
612 foreach $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
613 push (@subcollectionmap, "$subcollection\-\>" .
614 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
615 }
616 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
617
618 my @languagemap = ();
619 foreach $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
620 push (@languagemap, "$language\-\>" .
621 $self->{'index_mapping'}->{'languagemap'}->{$language});
622 }
623 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
624
625 $build_cfg->{'notbuilt'} = $self->{'notbuilt'};
626
627 # write out the build information
628 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
629 '^(builddate|numdocs|numbytes)$',
630 '^(indexmap|subcollectionmap|languagemap|notbuilt)$');
631
632}
633
634sub deinit {
635 my $self = shift (@_);
636}
637
638
6391;
640
641
Note: See TracBrowser for help on using the repository browser.