source: main/trunk/greenstone2/perllib/buildConfigxml.pm@ 22431

Last change on this file since 22431 was 21822, checked in by ak19, 14 years ago

Dr Bainbridge has fixed several perl files that depended on perl 5.8 to work and used to fail with Perl 5.10.

  • Property svn:keywords set to Author Date Id Revision
File size: 18.0 KB
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used. Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41 my $perl_dir;
42
43 # Note: $] encodes the version number of perl
44 if ($]>=5.010) {
45 $perl_dir = "perl-5.10";
46 }
47 elsif ($]>5.008) {
48 # perl 5.8.1 or above
49 $perl_dir = "perl-5.8";
50 }
51 elsif ($]<5.008) {
52 # assume perl 5.6
53 $perl_dir = "perl-5.6";
54 }
55 else {
56 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
57 print STDERR " Please upgrade to a newer version of Perl.\n";
58 $perl_dir = "perl-5.8";
59 }
60
61 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
62 # Use push to put this on the end, so an existing XML::Parser will be used by default
63 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
64 }
65}
66
67use XML::Parser;
68
69
70# A mapping hash to resolve name discrepancy between gs2 and gs3.
71my $nameMap = {"numDocs" => "numdocs",
72 "buildType" => "buildtype"
73 };
74
75
76# A hash structure which is returned by sub read_cfg_file.
77my $data = {};
78
79# use those unique attribute values to locate the text within the elements
80my $currentLocation = "";
81my $stringexp = q/^(buildType|numDocs)$/;
82
83my $indexmap_name = "";
84my $haveindexfields = 0;
85
86# Reads in the model collection configuration file, collectionConfig.xml,
87# into a structure which complies with the one used by gs2 (i.e. one read
88# in by &cfgread::read_cfg_file).
89sub read_cfg_file {
90 my ($filename) = @_;
91 $data = {};
92 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
93 return undef;
94 }
95
96 # create XML::Parser object for parsing metadata.xml files
97 my $parser;
98 if ($]<5.008) {
99 # Perl 5.6
100 $parser = new XML::Parser('Style' => 'Stream',
101 'Handlers' => {'Char' => \&Char,
102 'Doctype' => \&Doctype
103 });
104 }
105 else {
106 # Perl 5.8
107 $parser = new XML::Parser('Style' => 'Stream',
108 'ProtocolEncoding' => 'ISO-8859-1',
109 'Handlers' => {'Char' => \&Char,
110 'Doctype' => \&Doctype
111 });
112 }
113
114 if (!open (COLCFG, $filename)) {
115 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
116 } else {
117
118 $parser->parsefile ($filename);# (COLCFG);
119 close (COLCFG);
120 }
121
122 #&Display;
123 return $data;
124}
125
126sub StartTag {
127# Those marked with #@ will not be executed at the same time when this sub is being called
128# so that if/elsif is used to avoid unnecessary tests
129 my ($expat, $element) = @_;
130
131 my $name = $_{'name'};
132 my $shortname = $_{'shortname'};
133
134
135 #@ handling block metadataList
136 if (defined $name and $name =~ /$stringexp/){
137 $currentLocation = $name;
138 # the value will be retrieved later in Text sub
139 }
140
141 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
142 elsif ($element =~ /^indexList$/) {
143 # set up the data arrays
144 # this assumes that the build type has been read already, which is
145 # currently the order we save the file in.
146 if ($data->{'buildtype'} eq "mg") {
147 $indexmap_name = "indexmap";
148 if (!defined $data->{"indexmap"}) {
149 $data->{"indexmap"} = [];
150 }
151 }
152 else {
153 $indexmap_name = "indexfieldmap";
154 $haveindexfields = 1;
155 if (!defined $data->{"indexfieldmap"}) {
156 $data->{"indexfieldmap"} = [];
157 }
158 if (!defined $data->{"indexfields"}) {
159 $data->{"indexfields"} = [];
160 }
161
162 }
163
164 }
165
166 elsif ($element =~ /index/) {
167 # store each index in the map
168 if (defined $name && defined $shortname) {
169 push @{$data->{$indexmap_name}}, "$name->$shortname";
170 if ($haveindexfields) {
171 push @{$data->{'indexfields'}}, $name;
172 }
173 }
174 }
175
176
177}
178
179sub EndTag {
180 my ($expat, $element) = @_;
181}
182
183sub Text {
184 if (defined $currentLocation) {
185 #@ Handling block metadataList(numDocs, buildType)
186 if($currentLocation =~ /$stringexp/){
187 #print $currentLocation;
188 my $key = $nameMap->{$currentLocation};
189 $data->{$key} = $_;
190 undef $currentLocation;
191 }
192 }
193}
194
195# This sub is for debugging purposes
196sub Display {
197
198 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
199 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
200 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
201 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
202 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
203
204}
205
206# is this actually used??
207sub Doctype {
208 my ($expat, $name, $sysid, $pubid, $internal) = @_;
209
210 die if ($name !~ /^buildConfig$/);
211}
212
213# This Char function overrides the one in XML::Parser::Stream to overcome a
214# problem where $expat->{Text} is treated as the return value, slowing
215# things down significantly in some cases.
216sub Char {
217 if ($]<5.008) {
218 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
219 }
220 $_[0]->{'Text'} .= $_[1];
221 return undef;
222}
223
224
225
226sub write_line {
227 my ($filehandle, $line) = @_;
228 print $filehandle join ("", @$line), "\n";
229}
230
231# Create the buildConfig.xml file for a specific collection
232sub write_cfg_file {
233 # this sub is called in make_auxiliary_files() in basebuilder.pm
234 # the received args: $buildoutfile - destination file: buildConfig.xml
235 # $buildcfg - all build options,
236 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
237 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
238 my $line = [];
239
240 if (!open (COLCFG, ">$buildoutfile")) {
241 print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
242 die;
243 }
244
245 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
246
247 # output building metadata to build config file
248 my $buildtype;
249 if (defined $buildcfg->{"buildtype"}) {
250 $buildtype = $buildcfg->{"buildtype"};
251 } else {
252 $buildtype = "mgpp";
253 }
254 my $numdocs;
255 if (defined $buildcfg->{"numdocs"}) {
256 $numdocs = $buildcfg->{"numdocs"};
257 }
258 &write_line('COLCFG', ["<metadataList>"]);
259 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
260 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
261 if (defined $buildcfg->{'indexstem'}) {
262 &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
263 }
264 if (defined $buildcfg->{'infodbtype'}) {
265 &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
266 }
267 &write_line('COLCFG', ["</metadataList>"]);
268
269 my $service_type = "MGPP";
270 if ($buildtype eq "mg") {
271 $service_type = "MG";
272 } elsif ($buildtype eq "lucene") {
273 $service_type = "Lucene";
274 }
275
276 # output serviceRackList
277 &write_line('COLCFG', ["<serviceRackList>"]);
278
279 # do the search service
280 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
281 if (defined $buildcfg->{'indexstem'}) {
282 my $indexstem = $buildcfg->{'indexstem'};
283 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
284 }
285 if (defined $buildcfg->{'infodbtype'}) {
286 my $infodbtype = $buildcfg->{'infodbtype'};
287 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
288 }
289
290 #indexes
291 # maps index name to shortname
292 my $indexmap = {};
293 # keeps the order for indexes
294 my @indexlist = ();
295
296 my $defaultindex = "";
297 my $first = 1;
298 my $maptype = "indexfieldmap";
299 if ($buildtype eq "mg") {
300 $maptype = "indexmap";
301 }
302
303 #map {print $_."\n"} keys %$buildcfg;
304
305 if (defined $buildcfg->{$maptype}) {
306 my $indexmap_t = $buildcfg->{$maptype};
307 foreach my $i (@$indexmap_t) {
308 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
309 $indexmap->{$k} = $v;
310 push @indexlist, $k;
311 if ($first) {
312 $defaultindex = $v;
313 $first = 0;
314 }
315 }
316 # now if the user has assigned a default index, we use it
317 if (defined $collectcfg->{"defaultindex"}) {
318 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
319 }
320
321 } else {
322 print STDERR "$maptype not defined";
323 }
324 #for each index in indexList, write them out
325 &write_line('COLCFG', ["<indexList>"]);
326 foreach my $i (@indexlist) {
327 my $index = $indexmap->{$i};
328 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
329 }
330 &write_line('COLCFG', ["</indexList>"]);
331
332
333 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
334
335
336 # do indexOptionList
337 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
338 &write_line('COLCFG', ["<indexOptionList>"]);
339 my $stemindexes = 3; # default is stem and casefold
340 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
341 $stemindexes = $buildcfg->{'stemindexes'};
342 }
343 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
344
345 my $maxnumeric = 4; # default
346 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
347 $maxnumeric = $buildcfg->{'maxnumeric'};
348 }
349 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
350 &write_line('COLCFG', ["</indexOptionList>"]);
351 }
352
353 # levelList
354 my $levelmap = {};
355 my @levellist = ();
356 my $default_search_level = "Doc";
357 my $default_retrieve_level = "Doc";
358 my $default_db_level = "Doc";
359 $first = 1;
360 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
361 if (defined $buildcfg->{'levelmap'}) {
362 my $levelmap_t = $buildcfg->{'levelmap'};
363 foreach my $l (@$levelmap_t) {
364 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
365 $levelmap->{$key} = $val;
366 push @levellist, $key;
367 if ($first) {
368 # let default search level follow the first level in the level list
369 $default_search_level = $val;
370 # retrieve/database levels may get modified later if text level is defined
371 $default_retrieve_level = $val;
372 $default_db_level = $val;
373 $first = 0;
374 }
375 }
376 }
377 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
378 #if (defined $collectcfg->{"defaultlevel"}) {
379 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
380 # $default_retrieve_level = $default_search_level;
381 #}
382
383 if (defined $buildcfg->{'textlevel'}) {
384 # let the retrieve/database levels always follow the textlevel
385 $default_retrieve_level = $buildcfg->{'textlevel'};
386 $default_db_level = $buildcfg->{'textlevel'};
387
388 }
389 }
390 #for each level in levelList, write them out
391 if ($buildtype ne "mg") {
392 &write_line('COLCFG', ["<levelList>"]);
393 foreach my $lv (@levellist) {
394 my $level = $levelmap->{$lv};
395 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
396 }
397 &write_line('COLCFG', ["</levelList>"]);
398 }
399 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
400 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
401 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
402 }
403 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
404 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
405 }
406 # do searchTypeList
407 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
408 &write_line('COLCFG', ["<searchTypeList>"]);
409
410 if (defined $buildcfg->{"searchtype"}) {
411 my $searchtype_t = $buildcfg->{"searchtype"};
412 foreach my $s (@$searchtype_t) {
413 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
414 }
415 } else {
416 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
417 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
418 }
419 &write_line('COLCFG', ["</searchTypeList>"]);
420 }
421
422 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
423 $first = 1;
424 my $default_lang = "";
425 my $default_lang_short = "";
426 if (defined $buildcfg->{"languagemap"}) {
427 &write_line('COLCFG', ["<indexLanguageList>"]);
428
429 my $langmap_t = $buildcfg->{"languagemap"};
430 foreach my $l (@$langmap_t) {
431 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
432
433 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
434 if ($first) {
435 $default_lang = $k; #name
436 $default_lang_short = $v; #shortname
437 $first = 0;
438 }
439 }
440
441 &write_line('COLCFG', ["</indexLanguageList>"]);
442 # now if the user has assigned a default language (as "en", "ru" etc.)
443 if (defined $collectcfg->{"defaultlanguage"}) {
444 $default_lang = $collectcfg->{"defaultlanguage"};
445 }
446 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
447 }
448
449
450 # do indexSubcollectionList
451 my $default_subcol = "";# make it in sub scope to be used in the concatenation
452 if (defined $buildcfg->{'subcollectionmap'}) {
453 &write_line('COLCFG', ["<indexSubcollectionList>"]);
454 my $subcolmap = {};
455 my @subcollist = ();
456 $first = 1;
457 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
458 foreach my $l (@$subcolmap_t) {
459 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
460 $subcolmap->{$k} = $v;
461 push @subcollist, $k;
462 if ($first) {
463 $default_subcol = $v;
464 $first = 0;
465 }
466 }
467 foreach my $sl (@subcollist) {
468 my $subcol = $subcolmap->{$sl};
469 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
470 }
471
472 &write_line('COLCFG', ["</indexSubcollectionList>"]);
473 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
474 }
475
476 # close off search service
477 &write_line('COLCFG', ["</serviceRack>"]);
478
479 # do the retrieve service
480 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
481
482 # do default index
483 if (defined $buildcfg->{"languagemap"}) {
484 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
485 }
486 if (defined $buildcfg->{'subcollectionmap'}) {
487 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
488 }
489 if ($buildtype eq "mg") {
490 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
491 }
492
493 if (defined $buildcfg->{'indexstem'}) {
494 my $indexstem = $buildcfg->{'indexstem'};
495 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
496 }
497 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
498 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
499 }
500 if (defined $buildcfg->{'infodbtype'}) {
501 my $infodbtype = $buildcfg->{'infodbtype'};
502 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
503 }
504
505 &write_line('COLCFG', ["</serviceRack>"]);
506
507 # do the browse service
508 my $count = 1;
509 my $phind = 0;
510 my $started_classifiers = 0;
511
512 my $classifiers = $collectcfg->{"classify"};
513 foreach my $cl (@$classifiers) {
514 my $name = "CL$count";
515 $count++;
516 my ($classname) = @$cl[0];
517 if ($classname =~ /^phind$/i) {
518 $phind=1;
519 #should add it into coll config classifiers
520 next;
521 }
522
523 if (not $started_classifiers) {
524 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
525 if (defined $buildcfg->{'indexstem'}) {
526 my $indexstem = $buildcfg->{'indexstem'};
527 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
528 }
529 if (defined $buildcfg->{'infodbtype'}) {
530 my $infodbtype = $buildcfg->{'infodbtype'};
531 &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);
532 }
533 &write_line('COLCFG', ["<classifierList>"]);
534 $started_classifiers = 1;
535 }
536 my $content = ''; #use buttonname first, then metadata
537 if ($classname eq "DateList") {
538 $content = "Date";
539 } else {
540 for (my $j=0; $j<scalar(@$cl); $j++) {
541 my $arg = @$cl[$j];
542 if ($arg eq "-buttonname"){
543 $content = @$cl[$j+1];
544 last;
545 } elsif ($arg eq "-metadata") {
546 $content = @$cl[$j+1];
547 }
548
549 }
550 }
551 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
552 }
553 if ($started_classifiers) {
554 # end the classifiers
555 &write_line('COLCFG', ["</classifierList>"]);
556 # close off the Browse service
557 &write_line('COLCFG', ["</serviceRack>"]);
558 }
559
560 # the phind classifier is a separate service
561 if ($phind) {
562 # if phind classifier
563 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
564 }
565
566
567 &write_line('COLCFG', ["</serviceRackList>"]);
568 &write_line('COLCFG', ["</buildConfig>"]);
569
570 close (COLCFG);
571 }
572
573
574#########################################################
575
5761;
Note: See TracBrowser for help on using the repository browser.