source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 23895

Last change on this file since 23895 was 23895, checked in by sjm84, 13 years ago

Modified several Perl files to merge the locations where XML::Parser checked for the current version of perl into one location. Also tidied up several locations where a difference was specified between 5.6 and 5.8+ to do with ProtocolEncoding being used to initialise an XML::Parser. Given the recent "

  • Property svn:keywords set to Author Date Id Revision
File size: 17.1 KB
RevLine 
[15600]1###########################################################################
2#
[20096]3# buildConfigxml.pm --
[15600]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[14741]25
[20105]26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
[15600]30
[20096]31package buildConfigxml;
[20105]32
[15600]33use strict;
34no strict 'refs';
35no strict 'subs';
36
[23895]37use XMLParser;
[15600]38
39
[17895]40# A mapping hash to resolve name discrepancy between gs2 and gs3.
[20105]41my $nameMap = {"numDocs" => "numdocs",
[19898]42 "buildType" => "buildtype"
[15600]43 };
[20105]44
45
[15600]46# A hash structure which is returned by sub read_cfg_file.
47my $data = {};
48
49# use those unique attribute values to locate the text within the elements
50my $currentLocation = "";
[20105]51my $stringexp = q/^(buildType|numDocs)$/;
[15619]52
[20105]53my $indexmap_name = "";
54my $haveindexfields = 0;
[15600]55
[20105]56# Reads in the model collection configuration file, collectionConfig.xml,
57# into a structure which complies with the one used by gs2 (i.e. one read
58# in by &cfgread::read_cfg_file).
59sub read_cfg_file {
60 my ($filename) = @_;
61 $data = {};
62 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
63 return undef;
64 }
[15600]65
[23895]66 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
67
[20105]68 # create XML::Parser object for parsing metadata.xml files
[23895]69 my $parser = new XML::Parser('Style' => 'Stream',
70 'Pkg' => 'buildConfigxml',
71 'Handlers' => {'Char' => \&Char,
[20105]72 'Doctype' => \&Doctype
73 });
[15600]74
[20105]75 if (!open (COLCFG, $filename)) {
76 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
77 } else {
[15600]78
[20105]79 $parser->parsefile ($filename);# (COLCFG);
80 close (COLCFG);
81 }
82
83 #&Display;
84 return $data;
85}
86
[15600]87sub StartTag {
88# Those marked with #@ will not be executed at the same time when this sub is being called
89# so that if/elsif is used to avoid unnecessary tests
90 my ($expat, $element) = @_;
[15619]91
[15600]92 my $name = $_{'name'};
[20105]93 my $shortname = $_{'shortname'};
[15600]94
[20105]95
[15600]96 #@ handling block metadataList
[20105]97 if (defined $name and $name =~ /$stringexp/){
[15600]98 $currentLocation = $name;
[20105]99 # the value will be retrieved later in Text sub
[15600]100 }
101
[20105]102 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
103 elsif ($element =~ /^indexList$/) {
104 # set up the data arrays
105 # this assumes that the build type has been read already, which is
106 # currently the order we save the file in.
107 if ($data->{'buildtype'} eq "mg") {
108 $indexmap_name = "indexmap";
109 if (!defined $data->{"indexmap"}) {
110 $data->{"indexmap"} = [];
111 }
[15619]112 }
[20105]113 else {
114 $indexmap_name = "indexfieldmap";
115 $haveindexfields = 1;
116 if (!defined $data->{"indexfieldmap"}) {
117 $data->{"indexfieldmap"} = [];
118 }
119 if (!defined $data->{"indexfields"}) {
120 $data->{"indexfields"} = [];
121 }
[17895]122
[20105]123 }
124
[17895]125 }
[15619]126
[20105]127 elsif ($element =~ /index/) {
128 # store each index in the map
129 if (defined $name && defined $shortname) {
130 push @{$data->{$indexmap_name}}, "$name->$shortname";
131 if ($haveindexfields) {
132 push @{$data->{'indexfields'}}, $name;
133 }
[15600]134 }
135 }
136
137
138}
139
140sub EndTag {
141 my ($expat, $element) = @_;
142}
143
144sub Text {
[15619]145 if (defined $currentLocation) {
[20105]146 #@ Handling block metadataList(numDocs, buildType)
[15619]147 if($currentLocation =~ /$stringexp/){
148 #print $currentLocation;
149 my $key = $nameMap->{$currentLocation};
150 $data->{$key} = $_;
151 undef $currentLocation;
[20105]152 }
[15619]153 }
[15600]154}
[15619]155
[15600]156# This sub is for debugging purposes
157sub Display {
[20105]158
159 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
160 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
161 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
162 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
163 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
164
[15600]165}
[20105]166
167# is this actually used??
[15600]168sub Doctype {
169 my ($expat, $name, $sysid, $pubid, $internal) = @_;
170
[20105]171 die if ($name !~ /^buildConfig$/);
[15600]172}
173
174# This Char function overrides the one in XML::Parser::Stream to overcome a
175# problem where $expat->{Text} is treated as the return value, slowing
176# things down significantly in some cases.
177sub Char {
178 if ($]<5.008) {
179 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
180 }
181 $_[0]->{'Text'} .= $_[1];
182 return undef;
183}
[15619]184
[15600]185
186
187sub write_line {
188 my ($filehandle, $line) = @_;
189 print $filehandle join ("", @$line), "\n";
190}
191
192# Create the buildConfig.xml file for a specific collection
193sub write_cfg_file {
194 # this sub is called in make_auxiliary_files() in basebuilder.pm
195 # the received args: $buildoutfile - destination file: buildConfig.xml
[21785]196 # $buildcfg - all build options,
[20096]197 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
[21785]198 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
[15600]199 my $line = [];
200
201 if (!open (COLCFG, ">$buildoutfile")) {
[20096]202 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
[15600]203 die;
204 }
205
206 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
207
208 # output building metadata to build config file
209 my $buildtype;
210 if (defined $buildcfg->{"buildtype"}) {
211 $buildtype = $buildcfg->{"buildtype"};
212 } else {
213 $buildtype = "mgpp";
214 }
215 my $numdocs;
216 if (defined $buildcfg->{"numdocs"}) {
217 $numdocs = $buildcfg->{"numdocs"};
218 }
219 &write_line('COLCFG', ["<metadataList>"]);
220 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
221 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
[21783]222 if (defined $buildcfg->{'indexstem'}) {
223 &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
224 }
225 if (defined $buildcfg->{'infodbtype'}) {
226 &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
227 }
[15600]228 &write_line('COLCFG', ["</metadataList>"]);
229
230 my $service_type = "MGPP";
231 if ($buildtype eq "mg") {
232 $service_type = "MG";
233 } elsif ($buildtype eq "lucene") {
234 $service_type = "Lucene";
235 }
236
237 # output serviceRackList
238 &write_line('COLCFG', ["<serviceRackList>"]);
239
240 # do the search service
241 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
242 if (defined $buildcfg->{'indexstem'}) {
243 my $indexstem = $buildcfg->{'indexstem'};
244 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
245 }
[21439]246 if (defined $buildcfg->{'infodbtype'}) {
247 my $infodbtype = $buildcfg->{'infodbtype'};
248 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
249 }
[15600]250
251 #indexes
252 # maps index name to shortname
253 my $indexmap = {};
254 # keeps the order for indexes
255 my @indexlist = ();
256
257 my $defaultindex = "";
258 my $first = 1;
259 my $maptype = "indexfieldmap";
260 if ($buildtype eq "mg") {
261 $maptype = "indexmap";
262 }
263
264 #map {print $_."\n"} keys %$buildcfg;
265
266 if (defined $buildcfg->{$maptype}) {
267 my $indexmap_t = $buildcfg->{$maptype};
268 foreach my $i (@$indexmap_t) {
269 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
270 $indexmap->{$k} = $v;
271 push @indexlist, $k;
272 if ($first) {
273 $defaultindex = $v;
274 $first = 0;
275 }
276 }
277 # now if the user has assigned a default index, we use it
278 if (defined $collectcfg->{"defaultindex"}) {
279 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
280 }
281
282 } else {
283 print STDERR "$maptype not defined";
284 }
285 #for each index in indexList, write them out
286 &write_line('COLCFG', ["<indexList>"]);
287 foreach my $i (@indexlist) {
288 my $index = $indexmap->{$i};
289 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
290 }
291 &write_line('COLCFG', ["</indexList>"]);
292
[20065]293
[22485]294 #$defaultindex = "ZZ" if (!$defaultindex); # index allfields by default
295 if($defaultindex) {
296 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
297 }
[15600]298
[20065]299
[15600]300 # do indexOptionList
301 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
302 &write_line('COLCFG', ["<indexOptionList>"]);
303 my $stemindexes = 3; # default is stem and casefold
304 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
305 $stemindexes = $buildcfg->{'stemindexes'};
306 }
307 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
308
309 my $maxnumeric = 4; # default
310 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
311 $maxnumeric = $buildcfg->{'maxnumeric'};
312 }
313 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
314 &write_line('COLCFG', ["</indexOptionList>"]);
315 }
316
317 # levelList
318 my $levelmap = {};
319 my @levellist = ();
320 my $default_search_level = "Doc";
321 my $default_retrieve_level = "Doc";
[15685]322 my $default_db_level = "Doc";
[15600]323 $first = 1;
324 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
325 if (defined $buildcfg->{'levelmap'}) {
326 my $levelmap_t = $buildcfg->{'levelmap'};
327 foreach my $l (@$levelmap_t) {
328 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
329 $levelmap->{$key} = $val;
330 push @levellist, $key;
331 if ($first) {
332 # let default search level follow the first level in the level list
333 $default_search_level = $val;
[15685]334 # retrieve/database levels may get modified later if text level is defined
[15600]335 $default_retrieve_level = $val;
[15685]336 $default_db_level = $val;
[15600]337 $first = 0;
338 }
339 }
340 }
341 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
342 #if (defined $collectcfg->{"defaultlevel"}) {
343 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
344 # $default_retrieve_level = $default_search_level;
345 #}
346
347 if (defined $buildcfg->{'textlevel'}) {
[15685]348 # let the retrieve/database levels always follow the textlevel
[15600]349 $default_retrieve_level = $buildcfg->{'textlevel'};
[15685]350 $default_db_level = $buildcfg->{'textlevel'};
[15600]351
352 }
353 }
354 #for each level in levelList, write them out
[21783]355 if ($buildtype ne "mg") {
[15600]356 &write_line('COLCFG', ["<levelList>"]);
357 foreach my $lv (@levellist) {
358 my $level = $levelmap->{$lv};
359 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
360 }
361 &write_line('COLCFG', ["</levelList>"]);
362 }
363 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
364 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
365 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
366 }
367 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
[15685]368 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
[15600]369 }
370 # do searchTypeList
371 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
372 &write_line('COLCFG', ["<searchTypeList>"]);
373
374 if (defined $buildcfg->{"searchtype"}) {
375 my $searchtype_t = $buildcfg->{"searchtype"};
376 foreach my $s (@$searchtype_t) {
377 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
378 }
379 } else {
380 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
381 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
382 }
383 &write_line('COLCFG', ["</searchTypeList>"]);
384 }
385
386 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
387 $first = 1;
388 my $default_lang = "";
389 my $default_lang_short = "";
390 if (defined $buildcfg->{"languagemap"}) {
391 &write_line('COLCFG', ["<indexLanguageList>"]);
392
393 my $langmap_t = $buildcfg->{"languagemap"};
394 foreach my $l (@$langmap_t) {
395 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
396
397 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
398 if ($first) {
399 $default_lang = $k; #name
400 $default_lang_short = $v; #shortname
401 $first = 0;
402 }
403 }
404
405 &write_line('COLCFG', ["</indexLanguageList>"]);
406 # now if the user has assigned a default language (as "en", "ru" etc.)
407 if (defined $collectcfg->{"defaultlanguage"}) {
408 $default_lang = $collectcfg->{"defaultlanguage"};
409 }
410 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
411 }
412
413
414 # do indexSubcollectionList
415 my $default_subcol = "";# make it in sub scope to be used in the concatenation
416 if (defined $buildcfg->{'subcollectionmap'}) {
417 &write_line('COLCFG', ["<indexSubcollectionList>"]);
418 my $subcolmap = {};
419 my @subcollist = ();
420 $first = 1;
421 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
422 foreach my $l (@$subcolmap_t) {
423 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
424 $subcolmap->{$k} = $v;
425 push @subcollist, $k;
426 if ($first) {
427 $default_subcol = $v;
428 $first = 0;
429 }
430 }
431 foreach my $sl (@subcollist) {
432 my $subcol = $subcolmap->{$sl};
433 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
434 }
435
436 &write_line('COLCFG', ["</indexSubcollectionList>"]);
437 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
438 }
439
440 # close off search service
441 &write_line('COLCFG', ["</serviceRack>"]);
442
443 # do the retrieve service
444 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
445
446 # do default index
447 if (defined $buildcfg->{"languagemap"}) {
448 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
449 }
450 if (defined $buildcfg->{'subcollectionmap'}) {
451 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
452 }
453 if ($buildtype eq "mg") {
454 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
455 }
456
457 if (defined $buildcfg->{'indexstem'}) {
458 my $indexstem = $buildcfg->{'indexstem'};
459 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
460 }
461 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
462 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
463 }
[21439]464 if (defined $buildcfg->{'infodbtype'}) {
465 my $infodbtype = $buildcfg->{'infodbtype'};
466 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
467 }
468
[15600]469 &write_line('COLCFG', ["</serviceRack>"]);
470
471 # do the browse service
472 my $count = 1;
473 my $phind = 0;
474 my $started_classifiers = 0;
475
476 my $classifiers = $collectcfg->{"classify"};
477 foreach my $cl (@$classifiers) {
478 my $name = "CL$count";
479 $count++;
480 my ($classname) = @$cl[0];
481 if ($classname =~ /^phind$/i) {
482 $phind=1;
483 #should add it into coll config classifiers
484 next;
485 }
486
487 if (not $started_classifiers) {
488 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
489 if (defined $buildcfg->{'indexstem'}) {
490 my $indexstem = $buildcfg->{'indexstem'};
491 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
492 }
[21439]493 if (defined $buildcfg->{'infodbtype'}) {
494 my $infodbtype = $buildcfg->{'infodbtype'};
495 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
496 }
[15600]497 &write_line('COLCFG', ["<classifierList>"]);
498 $started_classifiers = 1;
499 }
500 my $content = ''; #use buttonname first, then metadata
501 if ($classname eq "DateList") {
502 $content = "Date";
503 } else {
504 for (my $j=0; $j<scalar(@$cl); $j++) {
505 my $arg = @$cl[$j];
506 if ($arg eq "-buttonname"){
507 $content = @$cl[$j+1];
508 last;
509 } elsif ($arg eq "-metadata") {
510 $content = @$cl[$j+1];
511 }
512
513 }
514 }
515 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
516 }
517 if ($started_classifiers) {
518 # end the classifiers
519 &write_line('COLCFG', ["</classifierList>"]);
520 # close off the Browse service
521 &write_line('COLCFG', ["</serviceRack>"]);
522 }
523
524 # the phind classifier is a separate service
525 if ($phind) {
526 # if phind classifier
527 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
528 }
529
530
531 &write_line('COLCFG', ["</serviceRackList>"]);
532 &write_line('COLCFG', ["</buildConfig>"]);
533
534 close (COLCFG);
535 }
536
537
538#########################################################
539
5401;
Note: See TracBrowser for help on using the repository browser.