source: gsdl/trunk/perllib/cfgread4gs3.pm@ 19898

Last change on this file since 19898 was 19898, checked in by kjdon, 15 years ago

modified slightly so it can read in buildConfig.xml. added buildType to stringexp as we need buildtype metadata from buildConfig when deciding whether indexer has changed in incremental build. This is all I need buildConfig for at this time, so haven't checked that all elements are there. Will probably need more work if want access to all elements.

  • Property svn:keywords set to Author Date Id Revision
File size: 27.2 KB
Line 
1###########################################################################
2#
3# cfgread4gs3.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package cfgread4gs3;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used. Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37 my $perl_dir;
38
39 # Note: $] encodes the version number of perl
40 if ($]>5.008) {
41 # perl 5.8.1 or above
42 $perl_dir = "perl-5.8";
43 }
44 elsif ($]<5.008) {
45 # assume perl 5.6
46 $perl_dir = "perl-5.6";
47 }
48 else {
49 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
50 print STDERR " Please upgrade to a newer version of Perl.\n";
51 $perl_dir = "perl-5.8";
52 }
53
54 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
55 # Use push to put this on the end, so an existing XML::Parser will be used by default
56 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
57 }
58}
59
60use XML::Parser;
61
62# A mapping hash to resolve name discrepancy between gs2 and gs3.
63my $nameMap = {"key" => "value",
64 "creator" => "creator",
65 "maintainer" => "maintainer",
66 "public" => "public",
67 "infodb" => "infodbtype",
68 "defaultIndex" => "defaultindex",
69 "defaultLevel" => "defaultlevel",
70 "name" => "collectionname",
71 "description" => "collectionextra",
72 "smallicon" => "iconcollectionsmall",
73 "icon" => "iconcollection",
74 "level" => "levels",
75 "classifier" => "classify",
76 "indexSubcollection" => "indexsubcollections",
77 "indexLanguage" => "languages",
78 "defaultIndexLanguage" => "defaultlanguage",
79 "index" => "indexes",
80 "plugin" => "plugin",
81 "plugout" => "plugout",
82 "indexOption" => "indexoptions",
83 "searchType" => "searchtype",
84 "languageMetadata" => "languagemetadata",
85 "buildType" => "buildtype"
86 };
87# A hash structure which is returned by sub read_cfg_file.
88my $data = {};
89
90my $repeatedBlock = q/^(browse|pluginList)$/;
91
92# use those unique attribute values to locate the text within the elements
93# creator, public, maintainer and within a displayItem.
94my $currentLocation = "";
95my $stringexp = q/^(creator|maintainer|public|buildType)$/;
96my $displayItemNames = q/^(name|description)$/;
97
98# For storing the attributes during the StartTag subroutine, so that
99# we can use it later in Text (or EndTag) subroutines
100my $currentAttrRef = undef;
101
102my $currentLevel = "";
103
104# Count the elements with same name within the same block
105# ("plugin", "option")
106my $currentIndex = 0;
107my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
108my $arrayarrayexp= q/^(plugin|classifier)$/;
109my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
110my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
111
112my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
113
114sub StartTag {
115# Those marked with #@ will not be executed at the same time when this sub is being called
116# so that if/elsif is used to avoid unnecessary tests
117 my ($expat, $element) = @_;
118
119 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
120 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
121 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
122 $currentAttrRef = \%_;
123
124 my $name = $_{'name'};
125 my $value = $_{'value'};
126 my $type = $_{'type'};
127
128 # for subcollections
129 my $filter = $_{'filter'};
130
131 # for flax activities
132 my $desid = $_{'desid'};
133 my $assigned = $_{'assigned'};
134 my $lang = $_{'lang'};
135
136 #@ Marking repeated block
137 if ($element =~ /$repeatedBlock/) {
138 $currentIndex = 0;
139 }
140
141 #@ handling block metadataList
142 elsif (defined $name and $name =~ /$stringexp/){
143 $currentLocation = $name;
144 }
145 #@ handling default search index/level/indexLanguage and languageMetadata
146 elsif ($element =~ /$defaults/) {
147 if (defined $name and $name =~ /\w/) {
148 $data->{$nameMap->{$element}} = $name;
149 }
150 }
151
152 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
153 elsif($element eq "displayItemList") {
154 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
155 }
156 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
157 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
158 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
159 $currentLocation = $name;
160 }
161 }
162
163 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
164 elsif ($element eq "infodb") {
165 $data->{'infodbtype'} = $type;
166 }
167
168 #@ Handling indexer: mgpp/mg/lucene; stringexp
169 elsif ($element eq "search") {
170 $data->{'buildtype'} = $type;
171 }
172
173 #@ Handling searchtype: plain,form; arrayexp
174 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
175 #@ Handling searchtype: plain, form
176 #$currentLocation = $name;
177 #}
178
179 #@ Handle index|level|indexSubcollection|indexLanguage
180 elsif ($element =~ /$arrayexp/) {
181 my $key = $nameMap->{$element};
182 if (!defined $data->{$key}) {
183 $data->{$key} = [];
184 }
185
186 push (@{$data->{$key}},$name);
187 }
188
189 #@ indexoptions: accentfold/casefold/stem; arrayexp
190 elsif ($element eq "indexOption") {
191 $currentLevel = "indexOption";
192 }
193 if ($currentLevel eq "indexOption" and $element eq "option") {
194 my $key = $nameMap->{$currentLevel};
195 if (!defined $data->{$key}) {
196 $data->{$key} = [];
197 }
198 push (@{$data->{$key}},$name);
199 }
200 #@ plugout options
201 elsif ($element eq "plugout") {
202 $currentLevel = "plugout";
203 my $key = $nameMap->{$currentLevel};
204 if (!defined $data->{$key}) {
205 $data->{$key} = [];
206 }
207 if(defined $name and $name ne ""){
208 push (@{$data->{$key}},$name);
209 }
210 else{
211 push (@{$data->{$key}},"GreenstoneXMLPlugout");
212 }
213 }
214 if ($currentLevel eq "plugout" and $element eq "option") {
215 my $key = $nameMap->{$currentLevel};
216 if (defined $name and $name ne ""){
217 push (@{$data->{$key}},$name);
218 }
219 if (defined $value and $value ne ""){
220 push (@{$data->{$key}},$value);
221 }
222 }
223
224 #@ use hash of hash of strings: hashexp
225 elsif ($element =~ /$hashexp/) {
226 if (!defined $data->{$element}) {
227 $data->{$element} = {};
228 }
229 if (defined $name and $name =~ /\w/) {
230 if (defined $filter and $filter =~ /\w/) {
231 $data->{$element}->{$name} = $filter;
232
233 }
234 }
235 }
236
237 #@ Handling each classifier/plugin element
238 elsif ($element =~ /$arrayarrayexp/) {
239 # find the gs2 mapping name
240 $currentLevel = $element;
241 my $key = $nameMap->{$element};
242
243 # define an array of array of strings foreach $k (@{$data->{$key}}) {
244 if (!defined $data->{$key}) {
245 $data->{$key} = [];
246 }
247 # Push classifier/plugin name (e.g. AZList) into $data as the first string
248 push (@{$data->{$key}->[$currentIndex]},$name);
249 #print $currentIndex."indexup\n";
250 }
251
252 #@ Handling the option elements in each classifier/plugin element (as the following strings)
253 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
254 # find the gs2 mapping name for classifier and plugin
255 my $key = $nameMap->{$currentLevel};
256
257 if (defined $name and $name =~ /\w/) {
258 push (@{$data->{$key}->[$currentIndex]}, $name);
259 }
260 if (defined $value and $value =~ /\w/) {
261 push (@{$data->{$key}->[$currentIndex]}, $value);
262 }
263
264 }
265 #@ Handling each flaxActivity element (arrayarrayexp)
266 elsif ($element eq "flaxActivity") {
267 if (!defined $data->{'flaxActivity'}) {
268 $data->{'flaxActivity'} = [];
269 }
270 if(defined $assigned and $assigned =~ /\w/ and $assigned eq "true") {
271 if (defined $name and $name =~ /\w/) {
272 push (@{$data->{'flaxActivity'}->[$currentIndex]}, 'name');
273 push (@{$data->{'flaxActivity'}->[$currentIndex]}, $name);
274 }
275
276 if (defined $desid and $desid =~ /\w/) {
277 push (@{$data->{'flaxActivity'}->[$currentIndex]}, 'desid');
278 push (@{$data->{'flaxActivity'}->[$currentIndex]}, $desid);
279 }
280
281 if (defined $lang and $lang =~ /\w/) {
282 push (@{$data->{'flaxActivity'}->[$currentIndex]}, 'lang');
283 push (@{$data->{'flaxActivity'}->[$currentIndex]}, $lang);
284 }
285 }
286 }
287}
288
289sub EndTag {
290 my ($expat, $element) = @_;
291 my $endTags = q/^(browse|pluginList|displayItemList)$/;
292 if ($element =~ /$endTags/) {
293 $currentIndex = 0;
294 $currentLevel = "";
295 }
296 # $arrayarrayexp contains classifier|plugin
297 elsif($element =~ /$arrayarrayexp/ || $element eq "flaxActivity"){
298 $currentIndex = $currentIndex + 1;
299 }
300}
301
302sub Text {
303 if (defined $currentLocation) {
304 #@ Handling block metadataList(creator, maintainer, public)
305 if($currentLocation =~ /$stringexp/){
306 #print $currentLocation;
307 my $key = $nameMap->{$currentLocation};
308 $data->{$key} = $_;
309 undef $currentLocation;
310 }
311
312 #@ Handling displayItem metadata that are children of displayItemList
313 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
314 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
315 my $lang = $currentAttrRef->{'lang'};
316 my $name = $currentAttrRef->{'name'};
317
318 # this is how data->collectionmeta's language is set in Greenstone 2.
319 # Need to be consistent, since export.pl accesses these values all in the same way
320 if(!defined $lang) {
321 $lang = 'default';
322 } else {
323 $lang = "[l=$lang]";
324 }
325
326 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
327 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
328 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
329 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
330 }
331 undef $currentLocation;
332 }
333
334 #@ Handling searchtype: plain,form; arrayexp
335 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
336 # map 'searchType' into 'searchtype'
337 my $key = $nameMap->{$currentLocation};
338 # split it by ','
339 my ($plain, $form) = split (",", $_);
340
341 if (!defined $data->{$key}) {
342 $data->{$key} = [];
343 }
344 if (defined $plain and $plain =~ /\w/) {
345 push @{ $data->{$key} }, $plain;
346 }
347 if (defined $form and $form =~ /\w/) {
348 push @{ $data->{$key} }, $form;
349 }
350 }
351 }
352}
353
354# This sub is for debugging purposes
355sub Display {
356 # metadataList
357 foreach my $k (keys %{$data}) {
358 print STDERR "*** metadatalist key $k\n";
359 }
360
361 print $data->{'creator'}."\n" if (defined $data->{'creator'});
362 print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
363 print $data->{"public"}."\n" if (defined $data->{"public"});
364 print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
365 print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
366 print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
367 print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
368 print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
369 print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
370 print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
371 print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
372 print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
373 print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
374
375 if (defined $data->{'plugin'}) {
376 foreach $a (@{$data->{'plugin'}}) {
377 print join(",",@$a);
378 print "\n";
379 }
380 }
381 if (defined $data->{'classify'}) {
382 print "Classifiers: \n";
383 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
384 }
385
386 if (defined $data->{'subcollection'}) {
387 foreach my $key (keys %{$data->{'subcollection'}}) {
388 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
389 }
390 }
391}
392sub Doctype {
393 my ($expat, $name, $sysid, $pubid, $internal) = @_;
394
395 # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files
396 # to be processed as well as the "DirectoryMetadata" files which should now
397 # be created by import.pl
398 die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);
399}
400
401# This Char function overrides the one in XML::Parser::Stream to overcome a
402# problem where $expat->{Text} is treated as the return value, slowing
403# things down significantly in some cases.
404sub Char {
405 if ($]<5.008) {
406 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
407 }
408 $_[0]->{'Text'} .= $_[1];
409 return undef;
410}
411
412# Reads in the model collection configuration file, collectionConfig.xml,
413# into a structure which complies with the one used by gs2 (i.e. one read
414# in by &cfgread::read_cfg_file).
415sub read_cfg_file {
416 my ($filename) = @_;
417 $data = {};
418 if (($filename !~ /collectionConfig\.xml$/ && $filename !~ /buildConfig\.xml$/) || !-f $filename) {
419 return undef;
420 }
421
422 # create XML::Parser object for parsing metadata.xml files
423 my $parser;
424 if ($]<5.008) {
425 # Perl 5.6
426 $parser = new XML::Parser('Style' => 'Stream',
427 'Handlers' => {'Char' => \&Char,
428 'Doctype' => \&Doctype
429 });
430 }
431 else {
432 # Perl 5.8
433 $parser = new XML::Parser('Style' => 'Stream',
434 'ProtocolEncoding' => 'ISO-8859-1',
435 'Handlers' => {'Char' => \&Char,
436 'Doctype' => \&Doctype
437 });
438 }
439
440 if (!open (COLCFG, $filename)) {
441 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
442 } else {
443
444 $parser->parsefile ($filename);# (COLCFG);
445 close (COLCFG);
446 }
447
448 #&Display;
449 return $data;
450}
451
452
453sub write_line {
454 my ($filehandle, $line) = @_;
455 print $filehandle join ("", @$line), "\n";
456}
457
458# Create the buildConfig.xml file for a specific collection
459sub write_cfg_file {
460 # this sub is called in make_auxiliary_files() in basebuilder.pm
461 # the received args: $buildoutfile - destination file: buildConfig.xml
462 # $buildcfg - all build options, eg, disable_OAI
463 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in cfgread4gs3.pm.
464 my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_;
465 my $line = [];
466
467 if (!open (COLCFG, ">$buildoutfile")) {
468 print STDERR "cfgread4gs3::write_cfg_file couldn't write the build config file $buildoutfile\n";
469 die;
470 }
471
472 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
473
474 # output building metadata to build config file
475 my $buildtype;
476 if (defined $buildcfg->{"buildtype"}) {
477 $buildtype = $buildcfg->{"buildtype"};
478 } else {
479 $buildtype = "mgpp";
480 }
481 my $numdocs;
482 if (defined $buildcfg->{"numdocs"}) {
483 $numdocs = $buildcfg->{"numdocs"};
484 }
485 &write_line('COLCFG', ["<metadataList>"]);
486 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
487 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
488 &write_line('COLCFG', ["</metadataList>"]);
489
490 my $service_type = "MGPP";
491 if ($buildtype eq "mg") {
492 $service_type = "MG";
493 } elsif ($buildtype eq "lucene") {
494 $service_type = "Lucene";
495 }
496
497 # output serviceRackList
498 &write_line('COLCFG', ["<serviceRackList>"]);
499
500 # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class
501 # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml.
502 if ($disable_OAI == 0) {
503 &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]);
504 if (defined $buildcfg->{'indexstem'}) {
505 my $indexstem = $buildcfg->{'indexstem'};
506 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
507 }
508 &write_line('COLCFG', ["</serviceRack>"]);
509 }
510 # do the search service
511 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
512 if (defined $buildcfg->{'indexstem'}) {
513 my $indexstem = $buildcfg->{'indexstem'};
514 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
515 }
516
517 #indexes
518 # maps index name to shortname
519 my $indexmap = {};
520 # keeps the order for indexes
521 my @indexlist = ();
522
523 my $defaultindex = "";
524 my $first = 1;
525 my $maptype = "indexfieldmap";
526 if ($buildtype eq "mg") {
527 $maptype = "indexmap";
528 }
529
530 #map {print $_."\n"} keys %$buildcfg;
531
532 if (defined $buildcfg->{$maptype}) {
533 my $indexmap_t = $buildcfg->{$maptype};
534 foreach my $i (@$indexmap_t) {
535 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
536 $indexmap->{$k} = $v;
537 push @indexlist, $k;
538 if ($first) {
539 $defaultindex = $v;
540 $first = 0;
541 }
542 }
543 # now if the user has assigned a default index, we use it
544 if (defined $collectcfg->{"defaultindex"}) {
545 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
546 }
547
548 } else {
549 print STDERR "$maptype not defined";
550 }
551 #for each index in indexList, write them out
552 &write_line('COLCFG', ["<indexList>"]);
553 foreach my $i (@indexlist) {
554 my $index = $indexmap->{$i};
555 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
556 }
557 &write_line('COLCFG', ["</indexList>"]);
558
559 # do default index only for mg
560 if ($buildtype eq "mg") {
561 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
562 }
563
564 # do indexOptionList
565 if ($buildtype eq "mg" || $buildtype eq "mgpp") {
566 &write_line('COLCFG', ["<indexOptionList>"]);
567 my $stemindexes = 3; # default is stem and casefold
568 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
569 $stemindexes = $buildcfg->{'stemindexes'};
570 }
571 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
572
573 my $maxnumeric = 4; # default
574 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
575 $maxnumeric = $buildcfg->{'maxnumeric'};
576 }
577 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
578 &write_line('COLCFG', ["</indexOptionList>"]);
579 }
580
581 # levelList
582 my $levelmap = {};
583 my @levellist = ();
584 my $default_search_level = "Doc";
585 my $default_retrieve_level = "Doc";
586 my $default_db_level = "Doc";
587 $first = 1;
588 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
589 if (defined $buildcfg->{'levelmap'}) {
590 my $levelmap_t = $buildcfg->{'levelmap'};
591 foreach my $l (@$levelmap_t) {
592 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
593 $levelmap->{$key} = $val;
594 push @levellist, $key;
595 if ($first) {
596 # let default search level follow the first level in the level list
597 $default_search_level = $val;
598 # retrieve/database levels may get modified later if text level is defined
599 $default_retrieve_level = $val;
600 $default_db_level = $val;
601 $first = 0;
602 }
603 }
604 }
605 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
606 #if (defined $collectcfg->{"defaultlevel"}) {
607 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
608 # $default_retrieve_level = $default_search_level;
609 #}
610
611 if (defined $buildcfg->{'textlevel'}) {
612 # let the retrieve/database levels always follow the textlevel
613 $default_retrieve_level = $buildcfg->{'textlevel'};
614 $default_db_level = $buildcfg->{'textlevel'};
615
616 }
617 }
618 #for each level in levelList, write them out
619 if ($buildtype ne "mg") {
620 &write_line('COLCFG', ["<levelList>"]);
621 foreach my $lv (@levellist) {
622 my $level = $levelmap->{$lv};
623 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
624 }
625 &write_line('COLCFG', ["</levelList>"]);
626 }
627 # add in defaultLevel as the same level as indexLevelList, making the reading job easier
628 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
629 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
630 }
631 if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
632 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
633 }
634 # do searchTypeList
635 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
636 &write_line('COLCFG', ["<searchTypeList>"]);
637
638 if (defined $buildcfg->{"searchtype"}) {
639 my $searchtype_t = $buildcfg->{"searchtype"};
640 foreach my $s (@$searchtype_t) {
641 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
642 }
643 } else {
644 &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
645 &write_line('COLCFG', ["<searchType name=\"form\" />"]);
646 }
647 &write_line('COLCFG', ["</searchTypeList>"]);
648 }
649
650 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
651 $first = 1;
652 my $default_lang = "";
653 my $default_lang_short = "";
654 if (defined $buildcfg->{"languagemap"}) {
655 &write_line('COLCFG', ["<indexLanguageList>"]);
656
657 my $langmap_t = $buildcfg->{"languagemap"};
658 foreach my $l (@$langmap_t) {
659 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
660
661 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
662 if ($first) {
663 $default_lang = $k; #name
664 $default_lang_short = $v; #shortname
665 $first = 0;
666 }
667 }
668
669 &write_line('COLCFG', ["</indexLanguageList>"]);
670 # now if the user has assigned a default language (as "en", "ru" etc.)
671 if (defined $collectcfg->{"defaultlanguage"}) {
672 $default_lang = $collectcfg->{"defaultlanguage"};
673 }
674 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
675 }
676
677
678 # do indexSubcollectionList
679 my $default_subcol = "";# make it in sub scope to be used in the concatenation
680 if (defined $buildcfg->{'subcollectionmap'}) {
681 &write_line('COLCFG', ["<indexSubcollectionList>"]);
682 my $subcolmap = {};
683 my @subcollist = ();
684 $first = 1;
685 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
686 foreach my $l (@$subcolmap_t) {
687 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
688 $subcolmap->{$k} = $v;
689 push @subcollist, $k;
690 if ($first) {
691 $default_subcol = $v;
692 $first = 0;
693 }
694 }
695 foreach my $sl (@subcollist) {
696 my $subcol = $subcolmap->{$sl};
697 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
698 }
699
700 &write_line('COLCFG', ["</indexSubcollectionList>"]);
701 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
702 }
703
704 # close off search service
705 &write_line('COLCFG', ["</serviceRack>"]);
706
707 # do the retrieve service
708 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
709
710 # do default index
711 if (defined $buildcfg->{"languagemap"}) {
712 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
713 }
714 if (defined $buildcfg->{'subcollectionmap'}) {
715 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
716 }
717 if ($buildtype eq "mg") {
718 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
719 }
720
721 if (defined $buildcfg->{'indexstem'}) {
722 my $indexstem = $buildcfg->{'indexstem'};
723 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
724 }
725 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
726 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
727 }
728 &write_line('COLCFG', ["</serviceRack>"]);
729
730 # do the browse service
731 my $count = 1;
732 my $phind = 0;
733 my $started_classifiers = 0;
734
735 my $classifiers = $collectcfg->{"classify"};
736 foreach my $cl (@$classifiers) {
737 my $name = "CL$count";
738 $count++;
739 my ($classname) = @$cl[0];
740 if ($classname =~ /^phind$/i) {
741 $phind=1;
742 #should add it into coll config classifiers
743 next;
744 }
745
746 if (not $started_classifiers) {
747 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
748 if (defined $buildcfg->{'indexstem'}) {
749 my $indexstem = $buildcfg->{'indexstem'};
750 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
751 }
752 &write_line('COLCFG', ["<classifierList>"]);
753 $started_classifiers = 1;
754 }
755 my $content = ''; #use buttonname first, then metadata
756 if ($classname eq "DateList") {
757 $content = "Date";
758 } else {
759 for (my $j=0; $j<scalar(@$cl); $j++) {
760 my $arg = @$cl[$j];
761 if ($arg eq "-buttonname"){
762 $content = @$cl[$j+1];
763 last;
764 } elsif ($arg eq "-metadata") {
765 $content = @$cl[$j+1];
766 }
767
768 }
769 }
770 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
771 }
772 if ($started_classifiers) {
773 # end the classifiers
774 &write_line('COLCFG', ["</classifierList>"]);
775 # close off the Browse service
776 &write_line('COLCFG', ["</serviceRack>"]);
777 }
778
779 # the phind classifier is a separate service
780 if ($phind) {
781 # if phind classifier
782 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
783 }
784
785 my $flaxActivities = $collectcfg->{"flaxActivity"};
786 foreach my $fa (@$flaxActivities) {
787 # Six elements of the array for three attribute name/value pairs: name, desid, and lang.
788 if(defined $fa and @$fa[0] =~ /\w/ and @$fa[1] =~ /\w/ and @$fa[2] =~ /\w/ and @$fa[3] =~ /\w/ and @$fa[4] =~ /\w/ and @$fa[5] =~ /\w/) {
789 &write_line('COLCFG', ["<serviceRack type=\"flaxActivity\" ", @$fa[0], "=\"", @$fa[1], "\" ", @$fa[2], "=\"", @$fa[3], "\" ", @$fa[4], "=\"", @$fa[5], "\" />"]);
790 }
791 }
792
793 &write_line('COLCFG', ["</serviceRackList>"]);
794 &write_line('COLCFG', ["</buildConfig>"]);
795
796 close (COLCFG);
797 }
798
799
800#########################################################
801
8021;
Note: See TracBrowser for help on using the repository browser.