source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 21439

Last change on this file since 21439 was 21439, checked in by davidb, 14 years ago

Support for 'infodbtype' added

  • Property svn:keywords set to Author Date Id Revision
File size: 18.6 KB
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used. Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41 my $perl_dir;
42
43 # Note: $] encodes the version number of perl
44 if ($]>5.008) {
45 # perl 5.8.1 or above
46 $perl_dir = "perl-5.8";
47 }
48 elsif ($]<5.008) {
49 # assume perl 5.6
50 $perl_dir = "perl-5.6";
51 }
52 else {
53 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
54 print STDERR " Please upgrade to a newer version of Perl.\n";
55 $perl_dir = "perl-5.8";
56 }
57
58 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
59 # Use push to put this on the end, so an existing XML::Parser will be used by default
60 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
61 }
62}
63
64use XML::Parser;
65
66
67# A mapping hash to resolve name discrepancy between gs2 and gs3.
68my $nameMap = {"numDocs" => "numdocs",
69 "buildType" => "buildtype"
70 };
71
72
73# A hash structure which is returned by sub read_cfg_file.
74my $data = {};
75
76# use those unique attribute values to locate the text within the elements
77my $currentLocation = "";
78my $stringexp = q/^(buildType|numDocs)$/;
79
80my $indexmap_name = "";
81my $haveindexfields = 0;
82
83# Reads in the model collection configuration file, collectionConfig.xml,
84# into a structure which complies with the one used by gs2 (i.e. one read
85# in by &cfgread::read_cfg_file).
86sub read_cfg_file {
87 my ($filename) = @_;
88 $data = {};
89 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
90 return undef;
91 }
92
93 # create XML::Parser object for parsing metadata.xml files
94 my $parser;
95 if ($]<5.008) {
96 # Perl 5.6
97 $parser = new XML::Parser('Style' => 'Stream',
98 'Handlers' => {'Char' => \&Char,
99 'Doctype' => \&Doctype
100 });
101 }
102 else {
103 # Perl 5.8
104 $parser = new XML::Parser('Style' => 'Stream',
105 'ProtocolEncoding' => 'ISO-8859-1',
106 'Handlers' => {'Char' => \&Char,
107 'Doctype' => \&Doctype
108 });
109 }
110
111 if (!open (COLCFG, $filename)) {
112 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
113 } else {
114
115 $parser->parsefile ($filename);# (COLCFG);
116 close (COLCFG);
117 }
118
119 #&Display;
120 return $data;
121}
122
123sub StartTag {
124# Those marked with #@ will not be executed at the same time when this sub is being called
125# so that if/elsif is used to avoid unnecessary tests
126 my ($expat, $element) = @_;
127
128 my $name = $_{'name'};
129 my $shortname = $_{'shortname'};
130
131
132 #@ handling block metadataList
133 if (defined $name and $name =~ /$stringexp/){
134 $currentLocation = $name;
135 # the value will be retrieved later in Text sub
136 }
137
138 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
139 elsif ($element =~ /^indexList$/) {
140 # set up the data arrays
141 # this assumes that the build type has been read already, which is
142 # currently the order we save the file in.
143 if ($data->{'buildtype'} eq "mg") {
144 $indexmap_name = "indexmap";
145 if (!defined $data->{"indexmap"}) {
146 $data->{"indexmap"} = [];
147 }
148 }
149 else {
150 $indexmap_name = "indexfieldmap";
151 $haveindexfields = 1;
152 if (!defined $data->{"indexfieldmap"}) {
153 $data->{"indexfieldmap"} = [];
154 }
155 if (!defined $data->{"indexfields"}) {
156 $data->{"indexfields"} = [];
157 }
158
159 }
160
161 }
162
163 elsif ($element =~ /index/) {
164 # store each index in the map
165 if (defined $name && defined $shortname) {
166 push @{$data->{$indexmap_name}}, "$name->$shortname";
167 if ($haveindexfields) {
168 push @{$data->{'indexfields'}}, $name;
169 }
170 }
171 }
172
173
174}
175
176sub EndTag {
177 my ($expat, $element) = @_;
178}
179
180sub Text {
181 if (defined $currentLocation) {
182 #@ Handling block metadataList(numDocs, buildType)
183 if($currentLocation =~ /$stringexp/){
184 #print $currentLocation;
185 my $key = $nameMap->{$currentLocation};
186 $data->{$key} = $_;
187 undef $currentLocation;
188 }
189 }
190}
191
192# This sub is for debugging purposes
193sub Display {
194
195 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
196 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
197 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
198 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
199 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
200
201}
202
203# is this actually used??
204sub Doctype {
205 my ($expat, $name, $sysid, $pubid, $internal) = @_;
206
207 die if ($name !~ /^buildConfig$/);
208}
209
210# This Char function overrides the one in XML::Parser::Stream to overcome a
211# problem where $expat->{Text} is treated as the return value, slowing
212# things down significantly in some cases.
213sub Char {
214 if ($]<5.008) {
215 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
216 }
217 $_[0]->{'Text'} .= $_[1];
218 return undef;
219}
220
221
222
223sub write_line {
224 my ($filehandle, $line) = @_;
225 print $filehandle join ("", @$line), "\n";
226}
227
228# Create the buildConfig.xml file for a specific collection
229sub write_cfg_file {
230 # this sub is called in make_auxiliary_files() in basebuilder.pm
231 # the received args: $buildoutfile - destination file: buildConfig.xml
232 # $buildcfg - all build options, eg, disable_OAI
233 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
234 my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_;
235 my $line = [];
236
237 if (!open (COLCFG, ">$buildoutfile")) {
238 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
239 die;
240 }
241
242 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
243
244 # output building metadata to build config file
245 my $buildtype;
246 if (defined $buildcfg->{"buildtype"}) {
247 $buildtype = $buildcfg->{"buildtype"};
248 } else {
249 $buildtype = "mgpp";
250 }
251 my $numdocs;
252 if (defined $buildcfg->{"numdocs"}) {
253 $numdocs = $buildcfg->{"numdocs"};
254 }
255 &write_line('COLCFG', ["<metadataList>"]);
256 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
257 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
258 &write_line('COLCFG', ["</metadataList>"]);
259
260 my $service_type = "MGPP";
261 if ($buildtype eq "mg") {
262 $service_type = "MG";
263 } elsif ($buildtype eq "lucene") {
264 $service_type = "Lucene";
265 }
266
267 # output serviceRackList
268 &write_line('COLCFG', ["<serviceRackList>"]);
269
270 # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class
271 # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml.
272 if ($disable_OAI == 0) {
273 &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]);
274 if (defined $buildcfg->{'indexstem'}) {
275 my $indexstem = $buildcfg->{'indexstem'};
276 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
277 }
278 if (defined $buildcfg->{'infodbtype'}) {
279 my $infodbtype = $buildcfg->{'infodbtype'};
280 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
281 }
282 &write_line('COLCFG', ["</serviceRack>"]);
283 }
284 # do the search service
285 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
286 if (defined $buildcfg->{'indexstem'}) {
287 my $indexstem = $buildcfg->{'indexstem'};
288 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
289 }
290 if (defined $buildcfg->{'infodbtype'}) {
291 my $infodbtype = $buildcfg->{'infodbtype'};
292 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
293 }
294
295 #indexes
296 # maps index name to shortname
297 my $indexmap = {};
298 # keeps the order for indexes
299 my @indexlist = ();
300
301 my $defaultindex = "";
302 my $first = 1;
303 my $maptype = "indexfieldmap";
304 if ($buildtype eq "mg") {
305 $maptype = "indexmap";
306 }
307
308 #map {print $_."\n"} keys %$buildcfg;
309
310 if (defined $buildcfg->{$maptype}) {
311 my $indexmap_t = $buildcfg->{$maptype};
312 foreach my $i (@$indexmap_t) {
313 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
314 $indexmap->{$k} = $v;
315 push @indexlist, $k;
316 if ($first) {
317 $defaultindex = $v;
318 $first = 0;
319 }
320 }
321 # now if the user has assigned a default index, we use it
322 if (defined $collectcfg->{"defaultindex"}) {
323 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
324 }
325
326 } else {
327 print STDERR "$maptype not defined";
328 }
329 #for each index in indexList, write them out
330 &write_line('COLCFG', ["<indexList>"]);
331 foreach my $i (@indexlist) {
332 my $index = $indexmap->{$i};
333 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
334 }
335 &write_line('COLCFG', ["</indexList>"]);
336
337
338 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
339
340
341 # do indexOptionList
342 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
343 &write_line('COLCFG', ["<indexOptionList>"]);
344 my $stemindexes = 3; # default is stem and casefold
345 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
346 $stemindexes = $buildcfg->{'stemindexes'};
347 }
348 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
349
350 my $maxnumeric = 4; # default
351 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
352 $maxnumeric = $buildcfg->{'maxnumeric'};
353 }
354 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
355 &write_line('COLCFG', ["</indexOptionList>"]);
356 }
357
358 # levelList
359 my $levelmap = {};
360 my @levellist = ();
361 my $default_search_level = "Doc";
362 my $default_retrieve_level = "Doc";
363 my $default_db_level = "Doc";
364 $first = 1;
365 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
366 if (defined $buildcfg->{'levelmap'}) {
367 my $levelmap_t = $buildcfg->{'levelmap'};
368 foreach my $l (@$levelmap_t) {
369 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
370 $levelmap->{$key} = $val;
371 push @levellist, $key;
372 if ($first) {
373 # let default search level follow the first level in the level list
374 $default_search_level = $val;
375 # retrieve/database levels may get modified later if text level is defined
376 $default_retrieve_level = $val;
377 $default_db_level = $val;
378 $first = 0;
379 }
380 }
381 }
382 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
383 #if (defined $collectcfg->{"defaultlevel"}) {
384 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
385 # $default_retrieve_level = $default_search_level;
386 #}
387
388 if (defined $buildcfg->{'textlevel'}) {
389 # let the retrieve/database levels always follow the textlevel
390 $default_retrieve_level = $buildcfg->{'textlevel'};
391 $default_db_level = $buildcfg->{'textlevel'};
392
393 }
394 }
395 #for each level in levelList, write them out
396 if ($buildtype ne "mg") {
397 &write_line('COLCFG', ["<levelList>"]);
398 foreach my $lv (@levellist) {
399 my $level = $levelmap->{$lv};
400 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
401 }
402 &write_line('COLCFG', ["</levelList>"]);
403 }
404 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
405 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
406 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
407 }
408 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
409 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
410 }
411 # do searchTypeList
412 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
413 &write_line('COLCFG', ["<searchTypeList>"]);
414
415 if (defined $buildcfg->{"searchtype"}) {
416 my $searchtype_t = $buildcfg->{"searchtype"};
417 foreach my $s (@$searchtype_t) {
418 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
419 }
420 } else {
421 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
422 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
423 }
424 &write_line('COLCFG', ["</searchTypeList>"]);
425 }
426
427 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
428 $first = 1;
429 my $default_lang = "";
430 my $default_lang_short = "";
431 if (defined $buildcfg->{"languagemap"}) {
432 &write_line('COLCFG', ["<indexLanguageList>"]);
433
434 my $langmap_t = $buildcfg->{"languagemap"};
435 foreach my $l (@$langmap_t) {
436 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
437
438 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
439 if ($first) {
440 $default_lang = $k; #name
441 $default_lang_short = $v; #shortname
442 $first = 0;
443 }
444 }
445
446 &write_line('COLCFG', ["</indexLanguageList>"]);
447 # now if the user has assigned a default language (as "en", "ru" etc.)
448 if (defined $collectcfg->{"defaultlanguage"}) {
449 $default_lang = $collectcfg->{"defaultlanguage"};
450 }
451 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
452 }
453
454
455 # do indexSubcollectionList
456 my $default_subcol = "";# make it in sub scope to be used in the concatenation
457 if (defined $buildcfg->{'subcollectionmap'}) {
458 &write_line('COLCFG', ["<indexSubcollectionList>"]);
459 my $subcolmap = {};
460 my @subcollist = ();
461 $first = 1;
462 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
463 foreach my $l (@$subcolmap_t) {
464 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
465 $subcolmap->{$k} = $v;
466 push @subcollist, $k;
467 if ($first) {
468 $default_subcol = $v;
469 $first = 0;
470 }
471 }
472 foreach my $sl (@subcollist) {
473 my $subcol = $subcolmap->{$sl};
474 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
475 }
476
477 &write_line('COLCFG', ["</indexSubcollectionList>"]);
478 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
479 }
480
481 # close off search service
482 &write_line('COLCFG', ["</serviceRack>"]);
483
484 # do the retrieve service
485 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
486
487 # do default index
488 if (defined $buildcfg->{"languagemap"}) {
489 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
490 }
491 if (defined $buildcfg->{'subcollectionmap'}) {
492 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
493 }
494 if ($buildtype eq "mg") {
495 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
496 }
497
498 if (defined $buildcfg->{'indexstem'}) {
499 my $indexstem = $buildcfg->{'indexstem'};
500 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
501 }
502 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
503 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
504 }
505 if (defined $buildcfg->{'infodbtype'}) {
506 my $infodbtype = $buildcfg->{'infodbtype'};
507 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
508 }
509
510 &write_line('COLCFG', ["</serviceRack>"]);
511
512 # do the browse service
513 my $count = 1;
514 my $phind = 0;
515 my $started_classifiers = 0;
516
517 my $classifiers = $collectcfg->{"classify"};
518 foreach my $cl (@$classifiers) {
519 my $name = "CL$count";
520 $count++;
521 my ($classname) = @$cl[0];
522 if ($classname =~ /^phind$/i) {
523 $phind=1;
524 #should add it into coll config classifiers
525 next;
526 }
527
528 if (not $started_classifiers) {
529 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
530 if (defined $buildcfg->{'indexstem'}) {
531 my $indexstem = $buildcfg->{'indexstem'};
532 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
533 }
534 if (defined $buildcfg->{'infodbtype'}) {
535 my $infodbtype = $buildcfg->{'infodbtype'};
536 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
537 }
538 &write_line('COLCFG', ["<classifierList>"]);
539 $started_classifiers = 1;
540 }
541 my $content = ''; #use buttonname first, then metadata
542 if ($classname eq "DateList") {
543 $content = "Date";
544 } else {
545 for (my $j=0; $j<scalar(@$cl); $j++) {
546 my $arg = @$cl[$j];
547 if ($arg eq "-buttonname"){
548 $content = @$cl[$j+1];
549 last;
550 } elsif ($arg eq "-metadata") {
551 $content = @$cl[$j+1];
552 }
553
554 }
555 }
556 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
557 }
558 if ($started_classifiers) {
559 # end the classifiers
560 &write_line('COLCFG', ["</classifierList>"]);
561 # close off the Browse service
562 &write_line('COLCFG', ["</serviceRack>"]);
563 }
564
565 # the phind classifier is a separate service
566 if ($phind) {
567 # if phind classifier
568 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
569 }
570
571
572 &write_line('COLCFG', ["</serviceRackList>"]);
573 &write_line('COLCFG', ["</buildConfig>"]);
574
575 close (COLCFG);
576 }
577
578
579#########################################################
580
5811;
Note: See TracBrowser for help on using the repository browser.