source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 21783

Last change on this file since 21783 was 21783, checked in by kjdon, 14 years ago

don't write out OAI servicerack here. do it in collectionConfig as it doesn't depend on building. so don't need to rebuild to turn it off

  • Property svn:keywords set to Author Date Id Revision
File size: 17.9 KB
RevLine 
[15600]1###########################################################################
2#
[20096]3# buildConfigxml.pm --
[15600]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[14741]25
[20105]26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
[15600]30
[20096]31package buildConfigxml;
[20105]32
[15600]33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used. Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41 my $perl_dir;
42
43 # Note: $] encodes the version number of perl
44 if ($]>5.008) {
45 # perl 5.8.1 or above
46 $perl_dir = "perl-5.8";
47 }
48 elsif ($]<5.008) {
49 # assume perl 5.6
50 $perl_dir = "perl-5.6";
51 }
52 else {
53 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
54 print STDERR " Please upgrade to a newer version of Perl.\n";
55 $perl_dir = "perl-5.8";
56 }
57
58 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
59 # Use push to put this on the end, so an existing XML::Parser will be used by default
60 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
61 }
62}
63
64use XML::Parser;
65
[20105]66
[17895]67# A mapping hash to resolve name discrepancy between gs2 and gs3.
[20105]68my $nameMap = {"numDocs" => "numdocs",
[19898]69 "buildType" => "buildtype"
[15600]70 };
[20105]71
72
[15600]73# A hash structure which is returned by sub read_cfg_file.
74my $data = {};
75
76# use those unique attribute values to locate the text within the elements
77my $currentLocation = "";
[20105]78my $stringexp = q/^(buildType|numDocs)$/;
[15619]79
[20105]80my $indexmap_name = "";
81my $haveindexfields = 0;
[15600]82
[20105]83# Reads in the model collection configuration file, collectionConfig.xml,
84# into a structure which complies with the one used by gs2 (i.e. one read
85# in by &cfgread::read_cfg_file).
86sub read_cfg_file {
87 my ($filename) = @_;
88 $data = {};
89 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
90 return undef;
91 }
[15600]92
[20105]93 # create XML::Parser object for parsing metadata.xml files
94 my $parser;
95 if ($]<5.008) {
96 # Perl 5.6
97 $parser = new XML::Parser('Style' => 'Stream',
98 'Handlers' => {'Char' => \&Char,
99 'Doctype' => \&Doctype
100 });
101 }
102 else {
103 # Perl 5.8
104 $parser = new XML::Parser('Style' => 'Stream',
105 'ProtocolEncoding' => 'ISO-8859-1',
106 'Handlers' => {'Char' => \&Char,
107 'Doctype' => \&Doctype
108 });
109 }
[15600]110
[20105]111 if (!open (COLCFG, $filename)) {
112 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
113 } else {
[15600]114
[20105]115 $parser->parsefile ($filename);# (COLCFG);
116 close (COLCFG);
117 }
118
119 #&Display;
120 return $data;
121}
122
[15600]123sub StartTag {
124# Those marked with #@ will not be executed at the same time when this sub is being called
125# so that if/elsif is used to avoid unnecessary tests
126 my ($expat, $element) = @_;
[15619]127
[15600]128 my $name = $_{'name'};
[20105]129 my $shortname = $_{'shortname'};
[15600]130
[20105]131
[15600]132 #@ handling block metadataList
[20105]133 if (defined $name and $name =~ /$stringexp/){
[15600]134 $currentLocation = $name;
[20105]135 # the value will be retrieved later in Text sub
[15600]136 }
137
[20105]138 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
139 elsif ($element =~ /^indexList$/) {
140 # set up the data arrays
141 # this assumes that the build type has been read already, which is
142 # currently the order we save the file in.
143 if ($data->{'buildtype'} eq "mg") {
144 $indexmap_name = "indexmap";
145 if (!defined $data->{"indexmap"}) {
146 $data->{"indexmap"} = [];
147 }
[15619]148 }
[20105]149 else {
150 $indexmap_name = "indexfieldmap";
151 $haveindexfields = 1;
152 if (!defined $data->{"indexfieldmap"}) {
153 $data->{"indexfieldmap"} = [];
154 }
155 if (!defined $data->{"indexfields"}) {
156 $data->{"indexfields"} = [];
157 }
[17895]158
[20105]159 }
160
[17895]161 }
[15619]162
[20105]163 elsif ($element =~ /index/) {
164 # store each index in the map
165 if (defined $name && defined $shortname) {
166 push @{$data->{$indexmap_name}}, "$name->$shortname";
167 if ($haveindexfields) {
168 push @{$data->{'indexfields'}}, $name;
169 }
[15600]170 }
171 }
172
173
174}
175
176sub EndTag {
177 my ($expat, $element) = @_;
178}
179
180sub Text {
[15619]181 if (defined $currentLocation) {
[20105]182 #@ Handling block metadataList(numDocs, buildType)
[15619]183 if($currentLocation =~ /$stringexp/){
184 #print $currentLocation;
185 my $key = $nameMap->{$currentLocation};
186 $data->{$key} = $_;
187 undef $currentLocation;
[20105]188 }
[15619]189 }
[15600]190}
[15619]191
[15600]192# This sub is for debugging purposes
193sub Display {
[20105]194
195 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
196 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
197 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
198 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
199 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
200
[15600]201}
[20105]202
203# is this actually used??
[15600]204sub Doctype {
205 my ($expat, $name, $sysid, $pubid, $internal) = @_;
206
[20105]207 die if ($name !~ /^buildConfig$/);
[15600]208}
209
210# This Char function overrides the one in XML::Parser::Stream to overcome a
211# problem where $expat->{Text} is treated as the return value, slowing
212# things down significantly in some cases.
213sub Char {
214 if ($]<5.008) {
215 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
216 }
217 $_[0]->{'Text'} .= $_[1];
218 return undef;
219}
[15619]220
[15600]221
222
223sub write_line {
224 my ($filehandle, $line) = @_;
225 print $filehandle join ("", @$line), "\n";
226}
227
228# Create the buildConfig.xml file for a specific collection
229sub write_cfg_file {
230 # this sub is called in make_auxiliary_files() in basebuilder.pm
231 # the received args: $buildoutfile - destination file: buildConfig.xml
232 # $buildcfg - all build options, eg, disable_OAI
[20096]233 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
[15600]234 my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_;
235 my $line = [];
236
237 if (!open (COLCFG, ">$buildoutfile")) {
[20096]238 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
[15600]239 die;
240 }
241
242 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
243
244 # output building metadata to build config file
245 my $buildtype;
246 if (defined $buildcfg->{"buildtype"}) {
247 $buildtype = $buildcfg->{"buildtype"};
248 } else {
249 $buildtype = "mgpp";
250 }
251 my $numdocs;
252 if (defined $buildcfg->{"numdocs"}) {
253 $numdocs = $buildcfg->{"numdocs"};
254 }
255 &write_line('COLCFG', ["<metadataList>"]);
256 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
257 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
[21783]258 if (defined $buildcfg->{'indexstem'}) {
259 &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
260 }
261 if (defined $buildcfg->{'infodbtype'}) {
262 &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
263 }
[15600]264 &write_line('COLCFG', ["</metadataList>"]);
265
266 my $service_type = "MGPP";
267 if ($buildtype eq "mg") {
268 $service_type = "MG";
269 } elsif ($buildtype eq "lucene") {
270 $service_type = "Lucene";
271 }
272
273 # output serviceRackList
274 &write_line('COLCFG', ["<serviceRackList>"]);
275
276 # do the search service
277 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
278 if (defined $buildcfg->{'indexstem'}) {
279 my $indexstem = $buildcfg->{'indexstem'};
280 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
281 }
[21439]282 if (defined $buildcfg->{'infodbtype'}) {
283 my $infodbtype = $buildcfg->{'infodbtype'};
284 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
285 }
[15600]286
287 #indexes
288 # maps index name to shortname
289 my $indexmap = {};
290 # keeps the order for indexes
291 my @indexlist = ();
292
293 my $defaultindex = "";
294 my $first = 1;
295 my $maptype = "indexfieldmap";
296 if ($buildtype eq "mg") {
297 $maptype = "indexmap";
298 }
299
300 #map {print $_."\n"} keys %$buildcfg;
301
302 if (defined $buildcfg->{$maptype}) {
303 my $indexmap_t = $buildcfg->{$maptype};
304 foreach my $i (@$indexmap_t) {
305 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
306 $indexmap->{$k} = $v;
307 push @indexlist, $k;
308 if ($first) {
309 $defaultindex = $v;
310 $first = 0;
311 }
312 }
313 # now if the user has assigned a default index, we use it
314 if (defined $collectcfg->{"defaultindex"}) {
315 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
316 }
317
318 } else {
319 print STDERR "$maptype not defined";
320 }
321 #for each index in indexList, write them out
322 &write_line('COLCFG', ["<indexList>"]);
323 foreach my $i (@indexlist) {
324 my $index = $indexmap->{$i};
325 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
326 }
327 &write_line('COLCFG', ["</indexList>"]);
328
[20065]329
330 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
[15600]331
[20065]332
[15600]333 # do indexOptionList
334 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
335 &write_line('COLCFG', ["<indexOptionList>"]);
336 my $stemindexes = 3; # default is stem and casefold
337 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
338 $stemindexes = $buildcfg->{'stemindexes'};
339 }
340 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
341
342 my $maxnumeric = 4; # default
343 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
344 $maxnumeric = $buildcfg->{'maxnumeric'};
345 }
346 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
347 &write_line('COLCFG', ["</indexOptionList>"]);
348 }
349
350 # levelList
351 my $levelmap = {};
352 my @levellist = ();
353 my $default_search_level = "Doc";
354 my $default_retrieve_level = "Doc";
[15685]355 my $default_db_level = "Doc";
[15600]356 $first = 1;
357 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
358 if (defined $buildcfg->{'levelmap'}) {
359 my $levelmap_t = $buildcfg->{'levelmap'};
360 foreach my $l (@$levelmap_t) {
361 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
362 $levelmap->{$key} = $val;
363 push @levellist, $key;
364 if ($first) {
365 # let default search level follow the first level in the level list
366 $default_search_level = $val;
[15685]367 # retrieve/database levels may get modified later if text level is defined
[15600]368 $default_retrieve_level = $val;
[15685]369 $default_db_level = $val;
[15600]370 $first = 0;
371 }
372 }
373 }
374 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
375 #if (defined $collectcfg->{"defaultlevel"}) {
376 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
377 # $default_retrieve_level = $default_search_level;
378 #}
379
380 if (defined $buildcfg->{'textlevel'}) {
[15685]381 # let the retrieve/database levels always follow the textlevel
[15600]382 $default_retrieve_level = $buildcfg->{'textlevel'};
[15685]383 $default_db_level = $buildcfg->{'textlevel'};
[15600]384
385 }
386 }
387 #for each level in levelList, write them out
[21783]388 if ($buildtype ne "mg") {
[15600]389 &write_line('COLCFG', ["<levelList>"]);
390 foreach my $lv (@levellist) {
391 my $level = $levelmap->{$lv};
392 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
393 }
394 &write_line('COLCFG', ["</levelList>"]);
395 }
396 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
397 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
398 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
399 }
400 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
[15685]401 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
[15600]402 }
403 # do searchTypeList
404 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
405 &write_line('COLCFG', ["<searchTypeList>"]);
406
407 if (defined $buildcfg->{"searchtype"}) {
408 my $searchtype_t = $buildcfg->{"searchtype"};
409 foreach my $s (@$searchtype_t) {
410 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
411 }
412 } else {
413 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
414 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
415 }
416 &write_line('COLCFG', ["</searchTypeList>"]);
417 }
418
419 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
420 $first = 1;
421 my $default_lang = "";
422 my $default_lang_short = "";
423 if (defined $buildcfg->{"languagemap"}) {
424 &write_line('COLCFG', ["<indexLanguageList>"]);
425
426 my $langmap_t = $buildcfg->{"languagemap"};
427 foreach my $l (@$langmap_t) {
428 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
429
430 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
431 if ($first) {
432 $default_lang = $k; #name
433 $default_lang_short = $v; #shortname
434 $first = 0;
435 }
436 }
437
438 &write_line('COLCFG', ["</indexLanguageList>"]);
439 # now if the user has assigned a default language (as "en", "ru" etc.)
440 if (defined $collectcfg->{"defaultlanguage"}) {
441 $default_lang = $collectcfg->{"defaultlanguage"};
442 }
443 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
444 }
445
446
447 # do indexSubcollectionList
448 my $default_subcol = "";# make it in sub scope to be used in the concatenation
449 if (defined $buildcfg->{'subcollectionmap'}) {
450 &write_line('COLCFG', ["<indexSubcollectionList>"]);
451 my $subcolmap = {};
452 my @subcollist = ();
453 $first = 1;
454 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
455 foreach my $l (@$subcolmap_t) {
456 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
457 $subcolmap->{$k} = $v;
458 push @subcollist, $k;
459 if ($first) {
460 $default_subcol = $v;
461 $first = 0;
462 }
463 }
464 foreach my $sl (@subcollist) {
465 my $subcol = $subcolmap->{$sl};
466 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
467 }
468
469 &write_line('COLCFG', ["</indexSubcollectionList>"]);
470 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
471 }
472
473 # close off search service
474 &write_line('COLCFG', ["</serviceRack>"]);
475
476 # do the retrieve service
477 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
478
479 # do default index
480 if (defined $buildcfg->{"languagemap"}) {
481 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
482 }
483 if (defined $buildcfg->{'subcollectionmap'}) {
484 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
485 }
486 if ($buildtype eq "mg") {
487 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
488 }
489
490 if (defined $buildcfg->{'indexstem'}) {
491 my $indexstem = $buildcfg->{'indexstem'};
492 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
493 }
494 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
495 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
496 }
[21439]497 if (defined $buildcfg->{'infodbtype'}) {
498 my $infodbtype = $buildcfg->{'infodbtype'};
499 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
500 }
501
[15600]502 &write_line('COLCFG', ["</serviceRack>"]);
503
504 # do the browse service
505 my $count = 1;
506 my $phind = 0;
507 my $started_classifiers = 0;
508
509 my $classifiers = $collectcfg->{"classify"};
510 foreach my $cl (@$classifiers) {
511 my $name = "CL$count";
512 $count++;
513 my ($classname) = @$cl[0];
514 if ($classname =~ /^phind$/i) {
515 $phind=1;
516 #should add it into coll config classifiers
517 next;
518 }
519
520 if (not $started_classifiers) {
521 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
522 if (defined $buildcfg->{'indexstem'}) {
523 my $indexstem = $buildcfg->{'indexstem'};
524 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
525 }
[21439]526 if (defined $buildcfg->{'infodbtype'}) {
527 my $infodbtype = $buildcfg->{'infodbtype'};
528 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
529 }
[15600]530 &write_line('COLCFG', ["<classifierList>"]);
531 $started_classifiers = 1;
532 }
533 my $content = ''; #use buttonname first, then metadata
534 if ($classname eq "DateList") {
535 $content = "Date";
536 } else {
537 for (my $j=0; $j<scalar(@$cl); $j++) {
538 my $arg = @$cl[$j];
539 if ($arg eq "-buttonname"){
540 $content = @$cl[$j+1];
541 last;
542 } elsif ($arg eq "-metadata") {
543 $content = @$cl[$j+1];
544 }
545
546 }
547 }
548 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
549 }
550 if ($started_classifiers) {
551 # end the classifiers
552 &write_line('COLCFG', ["</classifierList>"]);
553 # close off the Browse service
554 &write_line('COLCFG', ["</serviceRack>"]);
555 }
556
557 # the phind classifier is a separate service
558 if ($phind) {
559 # if phind classifier
560 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
561 }
562
563
564 &write_line('COLCFG', ["</serviceRackList>"]);
565 &write_line('COLCFG', ["</buildConfig>"]);
566
567 close (COLCFG);
568 }
569
570
571#########################################################
572
5731;
Note: See TracBrowser for help on using the repository browser.