source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 24193

Last change on this file since 24193 was 24070, checked in by ak19, 13 years ago

Fixed build error message about uninitialised variables, which was due to my having used uppercase characters in strings in instances where the config file parser is set to expect the same strings in all lower-case.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.3 KB
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37use XMLParser;
38
39
40# A mapping hash to resolve name discrepancy between gs2 and gs3.
41my $nameMap = {"numDocs" => "numdocs",
42 "buildType" => "buildtype"
43 };
44
45
46# A hash structure which is returned by sub read_cfg_file.
47my $data = {};
48
49# use those unique attribute values to locate the text within the elements
50my $currentLocation = "";
51my $stringexp = q/^(buildType|numDocs)$/;
52
53my $indexmap_name = "";
54my $haveindexfields = 0;
55
56# Reads in the model collection configuration file, collectionConfig.xml,
57# into a structure which complies with the one used by gs2 (i.e. one read
58# in by &cfgread::read_cfg_file).
59sub read_cfg_file {
60 my ($filename) = @_;
61 $data = {};
62 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
63 return undef;
64 }
65
66 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
67
68 # create XML::Parser object for parsing metadata.xml files
69 my $parser = new XML::Parser('Style' => 'Stream',
70 'Pkg' => 'buildConfigxml',
71 'Handlers' => {'Char' => \&Char,
72 'Doctype' => \&Doctype
73 });
74
75 if (!open (COLCFG, $filename)) {
76 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
77 } else {
78
79 $parser->parsefile ($filename);# (COLCFG);
80 close (COLCFG);
81 }
82
83 #&Display;
84 return $data;
85}
86
87sub StartTag {
88# Those marked with #@ will not be executed at the same time when this sub is being called
89# so that if/elsif is used to avoid unnecessary tests
90 my ($expat, $element) = @_;
91
92 my $name = $_{'name'};
93 my $shortname = $_{'shortname'};
94
95
96 #@ handling block metadataList
97 if (defined $name and $name =~ /$stringexp/){
98 $currentLocation = $name;
99 # the value will be retrieved later in Text sub
100 }
101
102 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
103 elsif ($element =~ /^indexList$/) {
104 # set up the data arrays
105 # this assumes that the build type has been read already, which is
106 # currently the order we save the file in.
107 if ($data->{'buildtype'} eq "mg") {
108 $indexmap_name = "indexmap";
109 if (!defined $data->{"indexmap"}) {
110 $data->{"indexmap"} = [];
111 }
112 }
113 else {
114 $indexmap_name = "indexfieldmap";
115 $haveindexfields = 1;
116 if (!defined $data->{"indexfieldmap"}) {
117 $data->{"indexfieldmap"} = [];
118 }
119 if (!defined $data->{"indexfields"}) {
120 $data->{"indexfields"} = [];
121 }
122
123 }
124
125 }
126
127 elsif ($element =~ /index/) {
128 # store each index in the map
129 if (defined $name && defined $shortname) {
130 push @{$data->{$indexmap_name}}, "$name->$shortname";
131 if ($haveindexfields) {
132 push @{$data->{'indexfields'}}, $name;
133 }
134 }
135 }
136
137
138}
139
140sub EndTag {
141 my ($expat, $element) = @_;
142}
143
144sub Text {
145 if (defined $currentLocation) {
146 #@ Handling block metadataList(numDocs, buildType)
147 if($currentLocation =~ /$stringexp/){
148 #print $currentLocation;
149 my $key = $nameMap->{$currentLocation};
150 $data->{$key} = $_;
151 undef $currentLocation;
152 }
153 }
154}
155
156# This sub is for debugging purposes
157sub Display {
158
159 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
160 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
161 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
162 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
163 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
164
165}
166
167# is this actually used??
168sub Doctype {
169 my ($expat, $name, $sysid, $pubid, $internal) = @_;
170
171 die if ($name !~ /^buildConfig$/);
172}
173
174# This Char function overrides the one in XML::Parser::Stream to overcome a
175# problem where $expat->{Text} is treated as the return value, slowing
176# things down significantly in some cases.
177sub Char {
178 if ($]<5.008) {
179 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
180 }
181 $_[0]->{'Text'} .= $_[1];
182 return undef;
183}
184
185
186
187sub write_line {
188 my ($filehandle, $line) = @_;
189 print $filehandle join ("", @$line), "\n";
190}
191
192# Create the buildConfig.xml file for a specific collection
193sub write_cfg_file {
194 # this sub is called in make_auxiliary_files() in basebuilder.pm
195 # the received args: $buildoutfile - destination file: buildConfig.xml
196 # $buildcfg - all build options,
197 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
198 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
199 my $line = [];
200
201 if (!open (COLCFG, ">$buildoutfile")) {
202 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
203 die;
204 }
205
206 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
207
208 # output building metadata to build config file
209 my $buildtype;
210 if (defined $buildcfg->{"buildtype"}) {
211 $buildtype = $buildcfg->{"buildtype"};
212 } else {
213 $buildtype = "mgpp";
214 }
215 my $numdocs;
216 if (defined $buildcfg->{"numdocs"}) {
217 $numdocs = $buildcfg->{"numdocs"};
218 }
219 &write_line('COLCFG', ["<metadataList>"]);
220 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
221 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
222 if (defined $buildcfg->{'indexstem'}) {
223 &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
224 }
225 if (defined $buildcfg->{'infodbtype'}) {
226 &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
227 }
228 if (defined $buildcfg->{'earliestdatestamp'}) {
229 &write_line('COLCFG', ["<metadata name=\"earliestdatestamp\">", $buildcfg->{"earliestdatestamp"}, "</metadata>"]);
230 }
231
232 &write_line('COLCFG', ["</metadataList>"]);
233
234 my $service_type = "MGPP";
235 if ($buildtype eq "mg") {
236 $service_type = "MG";
237 } elsif ($buildtype eq "lucene") {
238 $service_type = "Lucene";
239 }
240
241 # output serviceRackList
242 &write_line('COLCFG', ["<serviceRackList>"]);
243
244 # do the search service
245 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
246 if (defined $buildcfg->{'indexstem'}) {
247 my $indexstem = $buildcfg->{'indexstem'};
248 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
249 }
250 if (defined $buildcfg->{'infodbtype'}) {
251 my $infodbtype = $buildcfg->{'infodbtype'};
252 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
253 }
254
255 #indexes
256 # maps index name to shortname
257 my $indexmap = {};
258 # keeps the order for indexes
259 my @indexlist = ();
260
261 my $defaultindex = "";
262 my $first = 1;
263 my $maptype = "indexfieldmap";
264 if ($buildtype eq "mg") {
265 $maptype = "indexmap";
266 }
267
268 #map {print $_."\n"} keys %$buildcfg;
269
270 if (defined $buildcfg->{$maptype}) {
271 my $indexmap_t = $buildcfg->{$maptype};
272 foreach my $i (@$indexmap_t) {
273 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
274 $indexmap->{$k} = $v;
275 push @indexlist, $k;
276 if ($first) {
277 $defaultindex = $v;
278 $first = 0;
279 }
280 }
281 # now if the user has assigned a default index, we use it
282 if (defined $collectcfg->{"defaultindex"}) {
283 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
284 }
285
286 } else {
287 print STDERR "$maptype not defined";
288 }
289 #for each index in indexList, write them out
290 &write_line('COLCFG', ["<indexList>"]);
291 foreach my $i (@indexlist) {
292 my $index = $indexmap->{$i};
293 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
294 }
295 &write_line('COLCFG', ["</indexList>"]);
296
297
298 #$defaultindex = "ZZ" if (!$defaultindex); # index allfields by default
299 if($defaultindex) {
300 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
301 }
302
303
304 # do indexOptionList
305 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
306 &write_line('COLCFG', ["<indexOptionList>"]);
307 my $stemindexes = 3; # default is stem and casefold
308 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
309 $stemindexes = $buildcfg->{'stemindexes'};
310 }
311 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
312
313 my $maxnumeric = 4; # default
314 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
315 $maxnumeric = $buildcfg->{'maxnumeric'};
316 }
317 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
318 &write_line('COLCFG', ["</indexOptionList>"]);
319 }
320
321 # levelList
322 my $levelmap = {};
323 my @levellist = ();
324 my $default_search_level = "Doc";
325 my $default_retrieve_level = "Doc";
326 my $default_db_level = "Doc";
327 $first = 1;
328 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
329 if (defined $buildcfg->{'levelmap'}) {
330 my $levelmap_t = $buildcfg->{'levelmap'};
331 foreach my $l (@$levelmap_t) {
332 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
333 $levelmap->{$key} = $val;
334 push @levellist, $key;
335 if ($first) {
336 # let default search level follow the first level in the level list
337 $default_search_level = $val;
338 # retrieve/database levels may get modified later if text level is defined
339 $default_retrieve_level = $val;
340 $default_db_level = $val;
341 $first = 0;
342 }
343 }
344 }
345 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
346 #if (defined $collectcfg->{"defaultlevel"}) {
347 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
348 # $default_retrieve_level = $default_search_level;
349 #}
350
351 if (defined $buildcfg->{'textlevel'}) {
352 # let the retrieve/database levels always follow the textlevel
353 $default_retrieve_level = $buildcfg->{'textlevel'};
354 $default_db_level = $buildcfg->{'textlevel'};
355
356 }
357 }
358 #for each level in levelList, write them out
359 if ($buildtype ne "mg") {
360 &write_line('COLCFG', ["<levelList>"]);
361 foreach my $lv (@levellist) {
362 my $level = $levelmap->{$lv};
363 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
364 }
365 &write_line('COLCFG', ["</levelList>"]);
366 }
367 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
368 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
369 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
370 }
371 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
372 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
373 }
374 # do searchTypeList
375 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
376 &write_line('COLCFG', ["<searchTypeList>"]);
377
378 if (defined $buildcfg->{"searchtype"}) {
379 my $searchtype_t = $buildcfg->{"searchtype"};
380 foreach my $s (@$searchtype_t) {
381 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
382 }
383 } else {
384 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
385 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
386 }
387 &write_line('COLCFG', ["</searchTypeList>"]);
388 }
389
390 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
391 $first = 1;
392 my $default_lang = "";
393 my $default_lang_short = "";
394 if (defined $buildcfg->{"languagemap"}) {
395 &write_line('COLCFG', ["<indexLanguageList>"]);
396
397 my $langmap_t = $buildcfg->{"languagemap"};
398 foreach my $l (@$langmap_t) {
399 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
400
401 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
402 if ($first) {
403 $default_lang = $k; #name
404 $default_lang_short = $v; #shortname
405 $first = 0;
406 }
407 }
408
409 &write_line('COLCFG', ["</indexLanguageList>"]);
410 # now if the user has assigned a default language (as "en", "ru" etc.)
411 if (defined $collectcfg->{"defaultlanguage"}) {
412 $default_lang = $collectcfg->{"defaultlanguage"};
413 }
414 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
415 }
416
417
418 # do indexSubcollectionList
419 my $default_subcol = "";# make it in sub scope to be used in the concatenation
420 if (defined $buildcfg->{'subcollectionmap'}) {
421 &write_line('COLCFG', ["<indexSubcollectionList>"]);
422 my $subcolmap = {};
423 my @subcollist = ();
424 $first = 1;
425 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
426 foreach my $l (@$subcolmap_t) {
427 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
428 $subcolmap->{$k} = $v;
429 push @subcollist, $k;
430 if ($first) {
431 $default_subcol = $v;
432 $first = 0;
433 }
434 }
435 foreach my $sl (@subcollist) {
436 my $subcol = $subcolmap->{$sl};
437 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
438 }
439
440 &write_line('COLCFG', ["</indexSubcollectionList>"]);
441 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
442 }
443
444 # close off search service
445 &write_line('COLCFG', ["</serviceRack>"]);
446
447 # do the retrieve service
448 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
449
450 # do default index
451 if (defined $buildcfg->{"languagemap"}) {
452 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
453 }
454 if (defined $buildcfg->{'subcollectionmap'}) {
455 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
456 }
457 if ($buildtype eq "mg") {
458 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
459 }
460
461 if (defined $buildcfg->{'indexstem'}) {
462 my $indexstem = $buildcfg->{'indexstem'};
463 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
464 }
465 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
466 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
467 }
468 if (defined $buildcfg->{'infodbtype'}) {
469 my $infodbtype = $buildcfg->{'infodbtype'};
470 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
471 }
472
473 &write_line('COLCFG', ["</serviceRack>"]);
474
475 # do the browse service
476 my $count = 1;
477 my $phind = 0;
478 my $started_classifiers = 0;
479
480 my $classifiers = $collectcfg->{"classify"};
481 foreach my $cl (@$classifiers) {
482 my $name = "CL$count";
483 $count++;
484 my ($classname) = @$cl[0];
485 if ($classname =~ /^phind$/i) {
486 $phind=1;
487 #should add it into coll config classifiers
488 next;
489 }
490
491 if (not $started_classifiers) {
492 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
493 if (defined $buildcfg->{'indexstem'}) {
494 my $indexstem = $buildcfg->{'indexstem'};
495 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
496 }
497 if (defined $buildcfg->{'infodbtype'}) {
498 my $infodbtype = $buildcfg->{'infodbtype'};
499 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
500 }
501 &write_line('COLCFG', ["<classifierList>"]);
502 $started_classifiers = 1;
503 }
504 my $content = ''; #use buttonname first, then metadata
505 if ($classname eq "DateList") {
506 $content = "Date";
507 } else {
508 for (my $j=0; $j<scalar(@$cl); $j++) {
509 my $arg = @$cl[$j];
510 if ($arg eq "-buttonname"){
511 $content = @$cl[$j+1];
512 last;
513 } elsif ($arg eq "-metadata") {
514 $content = @$cl[$j+1];
515 }
516
517 }
518 }
519 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
520 }
521 if ($started_classifiers) {
522 # end the classifiers
523 &write_line('COLCFG', ["</classifierList>"]);
524 # close off the Browse service
525 &write_line('COLCFG', ["</serviceRack>"]);
526 }
527
528 # the phind classifier is a separate service
529 if ($phind) {
530 # if phind classifier
531 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
532 }
533
534
535 &write_line('COLCFG', ["</serviceRackList>"]);
536 &write_line('COLCFG', ["</buildConfig>"]);
537
538 close (COLCFG);
539 }
540
541
542#########################################################
543
5441;
Note: See TracBrowser for help on using the repository browser.