source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 21785

Last change on this file since 21785 was 21785, checked in by kjdon, 14 years ago

removing disable_OAI option. As far as I know, its only used for gs3, to determine whether to output the serviceRack or not, but this is now done in collectionConfig.xml

  • Property svn:keywords set to Author Date Id Revision
File size: 17.9 KB
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used. Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41 my $perl_dir;
42
43 # Note: $] encodes the version number of perl
44 if ($]>5.008) {
45 # perl 5.8.1 or above
46 $perl_dir = "perl-5.8";
47 }
48 elsif ($]<5.008) {
49 # assume perl 5.6
50 $perl_dir = "perl-5.6";
51 }
52 else {
53 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
54 print STDERR " Please upgrade to a newer version of Perl.\n";
55 $perl_dir = "perl-5.8";
56 }
57
58 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
59 # Use push to put this on the end, so an existing XML::Parser will be used by default
60 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
61 }
62}
63
64use XML::Parser;
65
66
67# A mapping hash to resolve name discrepancy between gs2 and gs3.
68my $nameMap = {"numDocs" => "numdocs",
69 "buildType" => "buildtype"
70 };
71
72
73# A hash structure which is returned by sub read_cfg_file.
74my $data = {};
75
76# use those unique attribute values to locate the text within the elements
77my $currentLocation = "";
78my $stringexp = q/^(buildType|numDocs)$/;
79
80my $indexmap_name = "";
81my $haveindexfields = 0;
82
83# Reads in the model collection configuration file, collectionConfig.xml,
84# into a structure which complies with the one used by gs2 (i.e. one read
85# in by &cfgread::read_cfg_file).
86sub read_cfg_file {
87 my ($filename) = @_;
88 $data = {};
89 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
90 return undef;
91 }
92
93 # create XML::Parser object for parsing metadata.xml files
94 my $parser;
95 if ($]<5.008) {
96 # Perl 5.6
97 $parser = new XML::Parser('Style' => 'Stream',
98 'Handlers' => {'Char' => \&Char,
99 'Doctype' => \&Doctype
100 });
101 }
102 else {
103 # Perl 5.8
104 $parser = new XML::Parser('Style' => 'Stream',
105 'ProtocolEncoding' => 'ISO-8859-1',
106 'Handlers' => {'Char' => \&Char,
107 'Doctype' => \&Doctype
108 });
109 }
110
111 if (!open (COLCFG, $filename)) {
112 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
113 } else {
114
115 $parser->parsefile ($filename);# (COLCFG);
116 close (COLCFG);
117 }
118
119 #&Display;
120 return $data;
121}
122
123sub StartTag {
124# Those marked with #@ will not be executed at the same time when this sub is being called
125# so that if/elsif is used to avoid unnecessary tests
126 my ($expat, $element) = @_;
127
128 my $name = $_{'name'};
129 my $shortname = $_{'shortname'};
130
131
132 #@ handling block metadataList
133 if (defined $name and $name =~ /$stringexp/){
134 $currentLocation = $name;
135 # the value will be retrieved later in Text sub
136 }
137
138 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
139 elsif ($element =~ /^indexList$/) {
140 # set up the data arrays
141 # this assumes that the build type has been read already, which is
142 # currently the order we save the file in.
143 if ($data->{'buildtype'} eq "mg") {
144 $indexmap_name = "indexmap";
145 if (!defined $data->{"indexmap"}) {
146 $data->{"indexmap"} = [];
147 }
148 }
149 else {
150 $indexmap_name = "indexfieldmap";
151 $haveindexfields = 1;
152 if (!defined $data->{"indexfieldmap"}) {
153 $data->{"indexfieldmap"} = [];
154 }
155 if (!defined $data->{"indexfields"}) {
156 $data->{"indexfields"} = [];
157 }
158
159 }
160
161 }
162
163 elsif ($element =~ /index/) {
164 # store each index in the map
165 if (defined $name && defined $shortname) {
166 push @{$data->{$indexmap_name}}, "$name->$shortname";
167 if ($haveindexfields) {
168 push @{$data->{'indexfields'}}, $name;
169 }
170 }
171 }
172
173
174}
175
176sub EndTag {
177 my ($expat, $element) = @_;
178}
179
180sub Text {
181 if (defined $currentLocation) {
182 #@ Handling block metadataList(numDocs, buildType)
183 if($currentLocation =~ /$stringexp/){
184 #print $currentLocation;
185 my $key = $nameMap->{$currentLocation};
186 $data->{$key} = $_;
187 undef $currentLocation;
188 }
189 }
190}
191
192# This sub is for debugging purposes
193sub Display {
194
195 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
196 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
197 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
198 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
199 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
200
201}
202
203# is this actually used??
204sub Doctype {
205 my ($expat, $name, $sysid, $pubid, $internal) = @_;
206
207 die if ($name !~ /^buildConfig$/);
208}
209
210# This Char function overrides the one in XML::Parser::Stream to overcome a
211# problem where $expat->{Text} is treated as the return value, slowing
212# things down significantly in some cases.
213sub Char {
214 if ($]<5.008) {
215 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
216 }
217 $_[0]->{'Text'} .= $_[1];
218 return undef;
219}
220
221
222
223sub write_line {
224 my ($filehandle, $line) = @_;
225 print $filehandle join ("", @$line), "\n";
226}
227
228# Create the buildConfig.xml file for a specific collection
229sub write_cfg_file {
230 # this sub is called in make_auxiliary_files() in basebuilder.pm
231 # the received args: $buildoutfile - destination file: buildConfig.xml
232 # $buildcfg - all build options,
233 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
234 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
235 my $line = [];
236
237 if (!open (COLCFG, ">$buildoutfile")) {
238 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
239 die;
240 }
241
242 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
243
244 # output building metadata to build config file
245 my $buildtype;
246 if (defined $buildcfg->{"buildtype"}) {
247 $buildtype = $buildcfg->{"buildtype"};
248 } else {
249 $buildtype = "mgpp";
250 }
251 my $numdocs;
252 if (defined $buildcfg->{"numdocs"}) {
253 $numdocs = $buildcfg->{"numdocs"};
254 }
255 &write_line('COLCFG', ["<metadataList>"]);
256 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
257 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
258 if (defined $buildcfg->{'indexstem'}) {
259 &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
260 }
261 if (defined $buildcfg->{'infodbtype'}) {
262 &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
263 }
264 &write_line('COLCFG', ["</metadataList>"]);
265
266 my $service_type = "MGPP";
267 if ($buildtype eq "mg") {
268 $service_type = "MG";
269 } elsif ($buildtype eq "lucene") {
270 $service_type = "Lucene";
271 }
272
273 # output serviceRackList
274 &write_line('COLCFG', ["<serviceRackList>"]);
275
276 # do the search service
277 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
278 if (defined $buildcfg->{'indexstem'}) {
279 my $indexstem = $buildcfg->{'indexstem'};
280 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
281 }
282 if (defined $buildcfg->{'infodbtype'}) {
283 my $infodbtype = $buildcfg->{'infodbtype'};
284 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
285 }
286
287 #indexes
288 # maps index name to shortname
289 my $indexmap = {};
290 # keeps the order for indexes
291 my @indexlist = ();
292
293 my $defaultindex = "";
294 my $first = 1;
295 my $maptype = "indexfieldmap";
296 if ($buildtype eq "mg") {
297 $maptype = "indexmap";
298 }
299
300 #map {print $_."\n"} keys %$buildcfg;
301
302 if (defined $buildcfg->{$maptype}) {
303 my $indexmap_t = $buildcfg->{$maptype};
304 foreach my $i (@$indexmap_t) {
305 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
306 $indexmap->{$k} = $v;
307 push @indexlist, $k;
308 if ($first) {
309 $defaultindex = $v;
310 $first = 0;
311 }
312 }
313 # now if the user has assigned a default index, we use it
314 if (defined $collectcfg->{"defaultindex"}) {
315 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
316 }
317
318 } else {
319 print STDERR "$maptype not defined";
320 }
321 #for each index in indexList, write them out
322 &write_line('COLCFG', ["<indexList>"]);
323 foreach my $i (@indexlist) {
324 my $index = $indexmap->{$i};
325 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
326 }
327 &write_line('COLCFG', ["</indexList>"]);
328
329
330 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
331
332
333 # do indexOptionList
334 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
335 &write_line('COLCFG', ["<indexOptionList>"]);
336 my $stemindexes = 3; # default is stem and casefold
337 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
338 $stemindexes = $buildcfg->{'stemindexes'};
339 }
340 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
341
342 my $maxnumeric = 4; # default
343 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
344 $maxnumeric = $buildcfg->{'maxnumeric'};
345 }
346 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
347 &write_line('COLCFG', ["</indexOptionList>"]);
348 }
349
350 # levelList
351 my $levelmap = {};
352 my @levellist = ();
353 my $default_search_level = "Doc";
354 my $default_retrieve_level = "Doc";
355 my $default_db_level = "Doc";
356 $first = 1;
357 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
358 if (defined $buildcfg->{'levelmap'}) {
359 my $levelmap_t = $buildcfg->{'levelmap'};
360 foreach my $l (@$levelmap_t) {
361 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
362 $levelmap->{$key} = $val;
363 push @levellist, $key;
364 if ($first) {
365 # let default search level follow the first level in the level list
366 $default_search_level = $val;
367 # retrieve/database levels may get modified later if text level is defined
368 $default_retrieve_level = $val;
369 $default_db_level = $val;
370 $first = 0;
371 }
372 }
373 }
374 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
375 #if (defined $collectcfg->{"defaultlevel"}) {
376 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
377 # $default_retrieve_level = $default_search_level;
378 #}
379
380 if (defined $buildcfg->{'textlevel'}) {
381 # let the retrieve/database levels always follow the textlevel
382 $default_retrieve_level = $buildcfg->{'textlevel'};
383 $default_db_level = $buildcfg->{'textlevel'};
384
385 }
386 }
387 #for each level in levelList, write them out
388 if ($buildtype ne "mg") {
389 &write_line('COLCFG', ["<levelList>"]);
390 foreach my $lv (@levellist) {
391 my $level = $levelmap->{$lv};
392 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
393 }
394 &write_line('COLCFG', ["</levelList>"]);
395 }
396 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
397 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
398 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
399 }
400 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
401 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
402 }
403 # do searchTypeList
404 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
405 &write_line('COLCFG', ["<searchTypeList>"]);
406
407 if (defined $buildcfg->{"searchtype"}) {
408 my $searchtype_t = $buildcfg->{"searchtype"};
409 foreach my $s (@$searchtype_t) {
410 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
411 }
412 } else {
413 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
414 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
415 }
416 &write_line('COLCFG', ["</searchTypeList>"]);
417 }
418
419 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
420 $first = 1;
421 my $default_lang = "";
422 my $default_lang_short = "";
423 if (defined $buildcfg->{"languagemap"}) {
424 &write_line('COLCFG', ["<indexLanguageList>"]);
425
426 my $langmap_t = $buildcfg->{"languagemap"};
427 foreach my $l (@$langmap_t) {
428 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
429
430 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
431 if ($first) {
432 $default_lang = $k; #name
433 $default_lang_short = $v; #shortname
434 $first = 0;
435 }
436 }
437
438 &write_line('COLCFG', ["</indexLanguageList>"]);
439 # now if the user has assigned a default language (as "en", "ru" etc.)
440 if (defined $collectcfg->{"defaultlanguage"}) {
441 $default_lang = $collectcfg->{"defaultlanguage"};
442 }
443 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
444 }
445
446
447 # do indexSubcollectionList
448 my $default_subcol = "";# make it in sub scope to be used in the concatenation
449 if (defined $buildcfg->{'subcollectionmap'}) {
450 &write_line('COLCFG', ["<indexSubcollectionList>"]);
451 my $subcolmap = {};
452 my @subcollist = ();
453 $first = 1;
454 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
455 foreach my $l (@$subcolmap_t) {
456 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
457 $subcolmap->{$k} = $v;
458 push @subcollist, $k;
459 if ($first) {
460 $default_subcol = $v;
461 $first = 0;
462 }
463 }
464 foreach my $sl (@subcollist) {
465 my $subcol = $subcolmap->{$sl};
466 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
467 }
468
469 &write_line('COLCFG', ["</indexSubcollectionList>"]);
470 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
471 }
472
473 # close off search service
474 &write_line('COLCFG', ["</serviceRack>"]);
475
476 # do the retrieve service
477 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
478
479 # do default index
480 if (defined $buildcfg->{"languagemap"}) {
481 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
482 }
483 if (defined $buildcfg->{'subcollectionmap'}) {
484 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
485 }
486 if ($buildtype eq "mg") {
487 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
488 }
489
490 if (defined $buildcfg->{'indexstem'}) {
491 my $indexstem = $buildcfg->{'indexstem'};
492 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
493 }
494 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
495 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
496 }
497 if (defined $buildcfg->{'infodbtype'}) {
498 my $infodbtype = $buildcfg->{'infodbtype'};
499 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
500 }
501
502 &write_line('COLCFG', ["</serviceRack>"]);
503
504 # do the browse service
505 my $count = 1;
506 my $phind = 0;
507 my $started_classifiers = 0;
508
509 my $classifiers = $collectcfg->{"classify"};
510 foreach my $cl (@$classifiers) {
511 my $name = "CL$count";
512 $count++;
513 my ($classname) = @$cl[0];
514 if ($classname =~ /^phind$/i) {
515 $phind=1;
516 #should add it into coll config classifiers
517 next;
518 }
519
520 if (not $started_classifiers) {
521 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
522 if (defined $buildcfg->{'indexstem'}) {
523 my $indexstem = $buildcfg->{'indexstem'};
524 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
525 }
526 if (defined $buildcfg->{'infodbtype'}) {
527 my $infodbtype = $buildcfg->{'infodbtype'};
528 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
529 }
530 &write_line('COLCFG', ["<classifierList>"]);
531 $started_classifiers = 1;
532 }
533 my $content = ''; #use buttonname first, then metadata
534 if ($classname eq "DateList") {
535 $content = "Date";
536 } else {
537 for (my $j=0; $j<scalar(@$cl); $j++) {
538 my $arg = @$cl[$j];
539 if ($arg eq "-buttonname"){
540 $content = @$cl[$j+1];
541 last;
542 } elsif ($arg eq "-metadata") {
543 $content = @$cl[$j+1];
544 }
545
546 }
547 }
548 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
549 }
550 if ($started_classifiers) {
551 # end the classifiers
552 &write_line('COLCFG', ["</classifierList>"]);
553 # close off the Browse service
554 &write_line('COLCFG', ["</serviceRack>"]);
555 }
556
557 # the phind classifier is a separate service
558 if ($phind) {
559 # if phind classifier
560 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
561 }
562
563
564 &write_line('COLCFG', ["</serviceRackList>"]);
565 &write_line('COLCFG', ["</buildConfig>"]);
566
567 close (COLCFG);
568 }
569
570
571#########################################################
572
5731;
Note: See TracBrowser for help on using the repository browser.