source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 22485

Last change on this file since 22485 was 22485, checked in by ak19, 11 years ago
  1. Dr Bainbridge fixed the database perl modules to all have the method read_info_keys (which reads the keys from the database into a map), so that dbutil.pm can have the same as a generic method. 2. buildConfigxml.pm only writes out the defaultIndex if it is set (to prevent an Uninitialised Variable warning message from Perl).
  • Property svn:keywords set to Author Date Id Revision
File size: 18.1 KB
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used. Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41 my $perl_dir;
42
43 # Note: $] encodes the version number of perl
44 if ($]>=5.010) {
45 $perl_dir = "perl-5.10";
46 }
47 elsif ($]>5.008) {
48 # perl 5.8.1 or above
49 $perl_dir = "perl-5.8";
50 }
51 elsif ($]<5.008) {
52 # assume perl 5.6
53 $perl_dir = "perl-5.6";
54 }
55 else {
56 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
57 print STDERR " Please upgrade to a newer version of Perl.\n";
58 $perl_dir = "perl-5.8";
59 }
60
61 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
62 # Use push to put this on the end, so an existing XML::Parser will be used by default
63 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
64 }
65}
66
67use XML::Parser;
68
69
70# A mapping hash to resolve name discrepancy between gs2 and gs3.
71my $nameMap = {"numDocs" => "numdocs",
72 "buildType" => "buildtype"
73 };
74
75
76# A hash structure which is returned by sub read_cfg_file.
77my $data = {};
78
79# use those unique attribute values to locate the text within the elements
80my $currentLocation = "";
81my $stringexp = q/^(buildType|numDocs)$/;
82
83my $indexmap_name = "";
84my $haveindexfields = 0;
85
86# Reads in the model collection configuration file, collectionConfig.xml,
87# into a structure which complies with the one used by gs2 (i.e. one read
88# in by &cfgread::read_cfg_file).
89sub read_cfg_file {
90 my ($filename) = @_;
91 $data = {};
92 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
93 return undef;
94 }
95
96 # create XML::Parser object for parsing metadata.xml files
97 my $parser;
98 if ($]<5.008) {
99 # Perl 5.6
100 $parser = new XML::Parser('Style' => 'Stream',
101 'Handlers' => {'Char' => \&Char,
102 'Doctype' => \&Doctype
103 });
104 }
105 else {
106 # Perl 5.8
107 $parser = new XML::Parser('Style' => 'Stream',
108 'ProtocolEncoding' => 'ISO-8859-1',
109 'Handlers' => {'Char' => \&Char,
110 'Doctype' => \&Doctype
111 });
112 }
113
114 if (!open (COLCFG, $filename)) {
115 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
116 } else {
117
118 $parser->parsefile ($filename);# (COLCFG);
119 close (COLCFG);
120 }
121
122 #&Display;
123 return $data;
124}
125
126sub StartTag {
127# Those marked with #@ will not be executed at the same time when this sub is being called
128# so that if/elsif is used to avoid unnecessary tests
129 my ($expat, $element) = @_;
130
131 my $name = $_{'name'};
132 my $shortname = $_{'shortname'};
133
134
135 #@ handling block metadataList
136 if (defined $name and $name =~ /$stringexp/){
137 $currentLocation = $name;
138 # the value will be retrieved later in Text sub
139 }
140
141 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
142 elsif ($element =~ /^indexList$/) {
143 # set up the data arrays
144 # this assumes that the build type has been read already, which is
145 # currently the order we save the file in.
146 if ($data->{'buildtype'} eq "mg") {
147 $indexmap_name = "indexmap";
148 if (!defined $data->{"indexmap"}) {
149 $data->{"indexmap"} = [];
150 }
151 }
152 else {
153 $indexmap_name = "indexfieldmap";
154 $haveindexfields = 1;
155 if (!defined $data->{"indexfieldmap"}) {
156 $data->{"indexfieldmap"} = [];
157 }
158 if (!defined $data->{"indexfields"}) {
159 $data->{"indexfields"} = [];
160 }
161
162 }
163
164 }
165
166 elsif ($element =~ /index/) {
167 # store each index in the map
168 if (defined $name && defined $shortname) {
169 push @{$data->{$indexmap_name}}, "$name->$shortname";
170 if ($haveindexfields) {
171 push @{$data->{'indexfields'}}, $name;
172 }
173 }
174 }
175
176
177}
178
179sub EndTag {
180 my ($expat, $element) = @_;
181}
182
183sub Text {
184 if (defined $currentLocation) {
185 #@ Handling block metadataList(numDocs, buildType)
186 if($currentLocation =~ /$stringexp/){
187 #print $currentLocation;
188 my $key = $nameMap->{$currentLocation};
189 $data->{$key} = $_;
190 undef $currentLocation;
191 }
192 }
193}
194
195# This sub is for debugging purposes
196sub Display {
197
198 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
199 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
200 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
201 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
202 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
203
204}
205
206# is this actually used??
207sub Doctype {
208 my ($expat, $name, $sysid, $pubid, $internal) = @_;
209
210 die if ($name !~ /^buildConfig$/);
211}
212
213# This Char function overrides the one in XML::Parser::Stream to overcome a
214# problem where $expat->{Text} is treated as the return value, slowing
215# things down significantly in some cases.
216sub Char {
217 if ($]<5.008) {
218 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
219 }
220 $_[0]->{'Text'} .= $_[1];
221 return undef;
222}
223
224
225
226sub write_line {
227 my ($filehandle, $line) = @_;
228 print $filehandle join ("", @$line), "\n";
229}
230
231# Create the buildConfig.xml file for a specific collection
232sub write_cfg_file {
233 # this sub is called in make_auxiliary_files() in basebuilder.pm
234 # the received args: $buildoutfile - destination file: buildConfig.xml
235 # $buildcfg - all build options,
236 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
237 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
238 my $line = [];
239
240 if (!open (COLCFG, ">$buildoutfile")) {
241 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
242 die;
243 }
244
245 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
246
247 # output building metadata to build config file
248 my $buildtype;
249 if (defined $buildcfg->{"buildtype"}) {
250 $buildtype = $buildcfg->{"buildtype"};
251 } else {
252 $buildtype = "mgpp";
253 }
254 my $numdocs;
255 if (defined $buildcfg->{"numdocs"}) {
256 $numdocs = $buildcfg->{"numdocs"};
257 }
258 &write_line('COLCFG', ["<metadataList>"]);
259 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
260 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
261 if (defined $buildcfg->{'indexstem'}) {
262 &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
263 }
264 if (defined $buildcfg->{'infodbtype'}) {
265 &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
266 }
267 &write_line('COLCFG', ["</metadataList>"]);
268
269 my $service_type = "MGPP";
270 if ($buildtype eq "mg") {
271 $service_type = "MG";
272 } elsif ($buildtype eq "lucene") {
273 $service_type = "Lucene";
274 }
275
276 # output serviceRackList
277 &write_line('COLCFG', ["<serviceRackList>"]);
278
279 # do the search service
280 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
281 if (defined $buildcfg->{'indexstem'}) {
282 my $indexstem = $buildcfg->{'indexstem'};
283 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
284 }
285 if (defined $buildcfg->{'infodbtype'}) {
286 my $infodbtype = $buildcfg->{'infodbtype'};
287 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
288 }
289
290 #indexes
291 # maps index name to shortname
292 my $indexmap = {};
293 # keeps the order for indexes
294 my @indexlist = ();
295
296 my $defaultindex = "";
297 my $first = 1;
298 my $maptype = "indexfieldmap";
299 if ($buildtype eq "mg") {
300 $maptype = "indexmap";
301 }
302
303 #map {print $_."\n"} keys %$buildcfg;
304
305 if (defined $buildcfg->{$maptype}) {
306 my $indexmap_t = $buildcfg->{$maptype};
307 foreach my $i (@$indexmap_t) {
308 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
309 $indexmap->{$k} = $v;
310 push @indexlist, $k;
311 if ($first) {
312 $defaultindex = $v;
313 $first = 0;
314 }
315 }
316 # now if the user has assigned a default index, we use it
317 if (defined $collectcfg->{"defaultindex"}) {
318 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
319 }
320
321 } else {
322 print STDERR "$maptype not defined";
323 }
324 #for each index in indexList, write them out
325 &write_line('COLCFG', ["<indexList>"]);
326 foreach my $i (@indexlist) {
327 my $index = $indexmap->{$i};
328 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
329 }
330 &write_line('COLCFG', ["</indexList>"]);
331
332
333 #$defaultindex = "ZZ" if (!$defaultindex); # index allfields by default
334 if($defaultindex) {
335 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
336 }
337
338
339 # do indexOptionList
340 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
341 &write_line('COLCFG', ["<indexOptionList>"]);
342 my $stemindexes = 3; # default is stem and casefold
343 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
344 $stemindexes = $buildcfg->{'stemindexes'};
345 }
346 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
347
348 my $maxnumeric = 4; # default
349 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
350 $maxnumeric = $buildcfg->{'maxnumeric'};
351 }
352 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
353 &write_line('COLCFG', ["</indexOptionList>"]);
354 }
355
356 # levelList
357 my $levelmap = {};
358 my @levellist = ();
359 my $default_search_level = "Doc";
360 my $default_retrieve_level = "Doc";
361 my $default_db_level = "Doc";
362 $first = 1;
363 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
364 if (defined $buildcfg->{'levelmap'}) {
365 my $levelmap_t = $buildcfg->{'levelmap'};
366 foreach my $l (@$levelmap_t) {
367 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
368 $levelmap->{$key} = $val;
369 push @levellist, $key;
370 if ($first) {
371 # let default search level follow the first level in the level list
372 $default_search_level = $val;
373 # retrieve/database levels may get modified later if text level is defined
374 $default_retrieve_level = $val;
375 $default_db_level = $val;
376 $first = 0;
377 }
378 }
379 }
380 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
381 #if (defined $collectcfg->{"defaultlevel"}) {
382 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
383 # $default_retrieve_level = $default_search_level;
384 #}
385
386 if (defined $buildcfg->{'textlevel'}) {
387 # let the retrieve/database levels always follow the textlevel
388 $default_retrieve_level = $buildcfg->{'textlevel'};
389 $default_db_level = $buildcfg->{'textlevel'};
390
391 }
392 }
393 #for each level in levelList, write them out
394 if ($buildtype ne "mg") {
395 &write_line('COLCFG', ["<levelList>"]);
396 foreach my $lv (@levellist) {
397 my $level = $levelmap->{$lv};
398 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
399 }
400 &write_line('COLCFG', ["</levelList>"]);
401 }
402 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
403 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
404 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
405 }
406 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
407 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
408 }
409 # do searchTypeList
410 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
411 &write_line('COLCFG', ["<searchTypeList>"]);
412
413 if (defined $buildcfg->{"searchtype"}) {
414 my $searchtype_t = $buildcfg->{"searchtype"};
415 foreach my $s (@$searchtype_t) {
416 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
417 }
418 } else {
419 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
420 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
421 }
422 &write_line('COLCFG', ["</searchTypeList>"]);
423 }
424
425 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
426 $first = 1;
427 my $default_lang = "";
428 my $default_lang_short = "";
429 if (defined $buildcfg->{"languagemap"}) {
430 &write_line('COLCFG', ["<indexLanguageList>"]);
431
432 my $langmap_t = $buildcfg->{"languagemap"};
433 foreach my $l (@$langmap_t) {
434 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
435
436 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
437 if ($first) {
438 $default_lang = $k; #name
439 $default_lang_short = $v; #shortname
440 $first = 0;
441 }
442 }
443
444 &write_line('COLCFG', ["</indexLanguageList>"]);
445 # now if the user has assigned a default language (as "en", "ru" etc.)
446 if (defined $collectcfg->{"defaultlanguage"}) {
447 $default_lang = $collectcfg->{"defaultlanguage"};
448 }
449 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
450 }
451
452
453 # do indexSubcollectionList
454 my $default_subcol = "";# make it in sub scope to be used in the concatenation
455 if (defined $buildcfg->{'subcollectionmap'}) {
456 &write_line('COLCFG', ["<indexSubcollectionList>"]);
457 my $subcolmap = {};
458 my @subcollist = ();
459 $first = 1;
460 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
461 foreach my $l (@$subcolmap_t) {
462 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
463 $subcolmap->{$k} = $v;
464 push @subcollist, $k;
465 if ($first) {
466 $default_subcol = $v;
467 $first = 0;
468 }
469 }
470 foreach my $sl (@subcollist) {
471 my $subcol = $subcolmap->{$sl};
472 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
473 }
474
475 &write_line('COLCFG', ["</indexSubcollectionList>"]);
476 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
477 }
478
479 # close off search service
480 &write_line('COLCFG', ["</serviceRack>"]);
481
482 # do the retrieve service
483 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
484
485 # do default index
486 if (defined $buildcfg->{"languagemap"}) {
487 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
488 }
489 if (defined $buildcfg->{'subcollectionmap'}) {
490 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
491 }
492 if ($buildtype eq "mg") {
493 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
494 }
495
496 if (defined $buildcfg->{'indexstem'}) {
497 my $indexstem = $buildcfg->{'indexstem'};
498 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
499 }
500 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
501 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
502 }
503 if (defined $buildcfg->{'infodbtype'}) {
504 my $infodbtype = $buildcfg->{'infodbtype'};
505 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
506 }
507
508 &write_line('COLCFG', ["</serviceRack>"]);
509
510 # do the browse service
511 my $count = 1;
512 my $phind = 0;
513 my $started_classifiers = 0;
514
515 my $classifiers = $collectcfg->{"classify"};
516 foreach my $cl (@$classifiers) {
517 my $name = "CL$count";
518 $count++;
519 my ($classname) = @$cl[0];
520 if ($classname =~ /^phind$/i) {
521 $phind=1;
522 #should add it into coll config classifiers
523 next;
524 }
525
526 if (not $started_classifiers) {
527 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
528 if (defined $buildcfg->{'indexstem'}) {
529 my $indexstem = $buildcfg->{'indexstem'};
530 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
531 }
532 if (defined $buildcfg->{'infodbtype'}) {
533 my $infodbtype = $buildcfg->{'infodbtype'};
534 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
535 }
536 &write_line('COLCFG', ["<classifierList>"]);
537 $started_classifiers = 1;
538 }
539 my $content = ''; #use buttonname first, then metadata
540 if ($classname eq "DateList") {
541 $content = "Date";
542 } else {
543 for (my $j=0; $j<scalar(@$cl); $j++) {
544 my $arg = @$cl[$j];
545 if ($arg eq "-buttonname"){
546 $content = @$cl[$j+1];
547 last;
548 } elsif ($arg eq "-metadata") {
549 $content = @$cl[$j+1];
550 }
551
552 }
553 }
554 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
555 }
556 if ($started_classifiers) {
557 # end the classifiers
558 &write_line('COLCFG', ["</classifierList>"]);
559 # close off the Browse service
560 &write_line('COLCFG', ["</serviceRack>"]);
561 }
562
563 # the phind classifier is a separate service
564 if ($phind) {
565 # if phind classifier
566 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
567 }
568
569
570 &write_line('COLCFG', ["</serviceRackList>"]);
571 &write_line('COLCFG', ["</buildConfig>"]);
572
573 close (COLCFG);
574 }
575
576
577#########################################################
578
5791;
Note: See TracBrowser for help on using the repository browser.