source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 38935

Last change on this file since 38935 was 36839, checked in by kjdon, 20 months ago

added serviceRack attribute to <search name='xxx' orthogonal='true'> - if specified, then you can write the serviceRack directly into collectionConfig serviceRackList, instead of it auto generating one (named GS2+<search_name>+Search) into buildConfig

  • Property svn:keywords set to Author Date Id Revision
File size: 17.7 KB
RevLine 
[15600]1###########################################################################
2#
[20096]3# collConfigxml.pm --
[15600]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[14741]25
[15600]26# reads in configuration files of xml form
27
[20096]28package collConfigxml;
[15600]29use strict;
30no strict 'refs';
31no strict 'subs';
32
[23895]33use XMLParser;
[15600]34
[17895]35# A mapping hash to resolve name discrepancy between gs2 and gs3.
[28034]36# the first item is the gs3 element name from collectionConfig, the second one
37# is the internal name for the option
[15600]38my $nameMap = {"key" => "value",
39 "creator" => "creator",
40 "maintainer" => "maintainer",
41 "public" => "public",
[17895]42 "infodb" => "infodbtype",
[15600]43 "defaultIndex" => "defaultindex",
44 "defaultLevel" => "defaultlevel",
45 "name" => "collectionname",
46 "description" => "collectionextra",
47 "smallicon" => "iconcollectionsmall",
48 "icon" => "iconcollection",
49 "level" => "levels",
50 "classifier" => "classify",
51 "indexSubcollection" => "indexsubcollections",
52 "indexLanguage" => "languages",
53 "defaultIndexLanguage" => "defaultlanguage",
54 "index" => "indexes",
[29176]55 "indexfieldoptions" => "indexfieldoptions",
[27803]56 "sort" => "sortfields",
[29422]57 "defaultSort" => "defaultsort",
[27803]58 "facet" => "facetfields",
[15600]59 "plugin" => "plugin",
[17895]60 "plugout" => "plugout",
[15600]61 "indexOption" => "indexoptions",
62 "searchType" => "searchtype",
63 "languageMetadata" => "languagemetadata",
[22456]64 "buildType" => "buildtype",
[24464]65 "orthogonalBuildTypes" => "orthogonalbuildtypes",
[15600]66 };
67# A hash structure which is returned by sub read_cfg_file.
68my $data = {};
69
[28034]70my $repeatedBlock = q/^(browse|pluginList)$/;
[15600]71
72# use those unique attribute values to locate the text within the elements
[15619]73# creator, public, maintainer and within a displayItem.
[15600]74my $currentLocation = "";
[19898]75my $stringexp = q/^(creator|maintainer|public|buildType)$/;
[15619]76my $displayItemNames = q/^(name|description)$/;
77
[28034]78# these options get set at top level
79my $topleveloptionexp = q/^(importOption|buildOption)$/;
80
[15619]81# For storing the attributes during the StartTag subroutine, so that
82# we can use it later in Text (or EndTag) subroutines
83my $currentAttrRef = undef;
[15600]84
85my $currentLevel = "";
86
87# Count the elements with same name within the same block
88# ("plugin", "option")
89my $currentIndex = 0;
[29176]90
91my $structexp = q/^(index)$/;
92# structexp contains a hashmap of option(name, value) pairs per index name like allfields/ZZ or titles/TI
93# e.g. <index name="allfields">
94# <displayItem ... />
95# <option name="solrfieldtype" value="text_ja" />
96# </index>
97
98my $arrayexp = q/^(sort|facet|level|indexOption|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/;
99#my $arrayexp = q/^(index|sort|facet|level|indexOption|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/;
[26451]100my $arrayarrayexp = q/^(plugin|classifier)$/; #|buildOption)$/;
[15619]101my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
102my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
[15600]103
[29422]104my $defaults = q/^(defaultIndex|defaultLevel|defaultSort|defaultIndexLanguage|languageMetadata)$/;
[15600]105
[20099]106# Reads in the model collection configuration file, collectionConfig.xml,
107# into a structure which complies with the one used by gs2 (i.e. one read
108# in by &cfgread::read_cfg_file).
109sub read_cfg_file {
110 my ($filename) = @_;
111 $data = {};
112 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
113 return undef;
114 }
[23895]115 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
116
[20099]117 # create XML::Parser object for parsing metadata.xml files
[23895]118 my $parser = new XML::Parser('Style' => 'Stream',
119 'Pkg' => 'collConfigxml',
120 'Handlers' => {'Char' => \&Char,
[20099]121 'Doctype' => \&Doctype
122 });
123 if (!open (COLCFG, $filename)) {
124 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
125 } else {
126
127 $parser->parsefile ($filename);# (COLCFG);
128 close (COLCFG);
129 }
130
[22485]131 #&Display;
[20099]132 return $data;
133}
134
[15600]135sub StartTag {
136# Those marked with #@ will not be executed at the same time when this sub is being called
137# so that if/elsif is used to avoid unnecessary tests
138 my ($expat, $element) = @_;
[15619]139
140 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
[17895]141 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
[15619]142 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
143 $currentAttrRef = \%_;
[15600]144
145 my $name = $_{'name'};
146 my $value = $_{'value'};
147 my $type = $_{'type'};
[24464]148 my $orthogonal = $_{'orthogonal'};
[15600]149
150 # for subcollections
151 my $filter = $_{'filter'};
[20099]152
[20104]153 # was this just a flax thing??
154 my $assigned = $_{'assigned'};
155
[15600]156 #@ Marking repeated block
157 if ($element =~ /$repeatedBlock/) {
158 $currentIndex = 0;
159 }
160
161 #@ handling block metadataList
162 elsif (defined $name and $name =~ /$stringexp/){
163 $currentLocation = $name;
164 }
165 #@ handling default search index/level/indexLanguage and languageMetadata
166 elsif ($element =~ /$defaults/) {
167 if (defined $name and $name =~ /\w/) {
168 $data->{$nameMap->{$element}} = $name;
169 }
170 }
171
[15619]172 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
173 elsif($element eq "displayItemList") {
174 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
175 }
176 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
177 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
178 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
179 $currentLocation = $name;
180 }
181 }
[17895]182
183 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
184 elsif ($element eq "infodb") {
185 $data->{'infodbtype'} = $type;
186 }
[15619]187
[15600]188 #@ Handling indexer: mgpp/mg/lucene; stringexp
[24464]189 #@ Handling orthogonal indexers: audioDB; arrayexp
[15600]190 elsif ($element eq "search") {
[24464]191 if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) {
192 push(@{$data->{'orthogonalbuildtypes'}},$type);
[36839]193 my $service_rack = $_{'serviceRack'};
194 if ((defined $service_rack) && $service_rack =~ /\w/) {
195 $data->{'orthogonalcustomservicerack'}->{$type} = $service_rack;;
196 }
[24464]197 }
198 else {
199 $data->{'buildtype'} = $type;
200 }
[15600]201 }
[24055]202
[24464]203 elsif ($element eq "store_metadata_coverage")
204 {
205## print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*";
206 $data->{'store_metadata_coverage'} = $value;
207 }
[15600]208
209 #@ Handling searchtype: plain,form; arrayexp
210 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
211 #@ Handling searchtype: plain, form
212 #$currentLocation = $name;
213 #}
214
[29176]215 #@ Handle sort|facet|level|indexOption|indexSubcollection|indexLanguage
[15600]216 elsif ($element =~ /$arrayexp/) {
[28034]217 my $key = $nameMap->{$element}; #
[15600]218 if (!defined $data->{$key}) {
219 $data->{$key} = [];
220 }
221
[27192]222 if (defined $name) {
223 push (@{$data->{$key}},$name);
224 }
[15600]225 }
[29176]226
227 #@ Handle index which can have options as children to be put in a map: <option name="name" value="value" />
228 elsif ($element =~ /$structexp/) {
229 # find the gs2 mapping name
230 $currentLevel = $element;
231
232 # for GS2, 'indexes' should be an arrayexp, so maintain that part of the code as it is
233 my $key = $nameMap->{$element}; # 'indexes'
234 if (!defined $data->{$key}) {
235 $data->{$key} = [];
236 }
237
238 if (defined $name) {
239 push (@{$data->{$key}},$name);
240 }
241 }
242
243 #@ Handling the option elements in each index structure, if any, only for GS2
244 elsif ($currentLevel =~ /$structexp/ && $element eq "option") {
245 # find the gs2 mapping name for classifier and plugin
246 my $key = $nameMap->{$currentLevel."fieldoptions"}; # my $key = $currentLevel."fieldoptions"; # indexfieldoptions
247
248 # The last element of the 'indexes' array contains the name of the index currently being processed
249 # e.g. "allfields"
250 my $indexKey = $nameMap->{$currentLevel}; # 'indexes'
251 my $arrSize = scalar( @{$data->{$indexKey}} ); # length of 'indexes' array
252 my $indexName = @{$data->{$indexKey}}[$arrSize-1]; # name of index currently being processed in prev elsif
253
254 if (!defined $data->{$key}) {
255 $data->{$key} = {}; # 'indexoptions' is a new hashmap
256 }
257 if (defined $name and $name =~ /\w/ && defined $value and $value =~ /\w/) {
258 # we have a name and value to this option, add them as options associated with the current index
259
260 if (!defined $data->{$key}->{$indexName}) {
261 $data->{$key}->{$indexName} = {}; # indexoptions -> allfields is a new hashmap
262 }
263
264 $data->{$key}->{$indexName}->{$name} = $value;
265
266 #print STDERR "@@@ Found: Value: data->{'indexfieldoptions'}->{$indexName}->{$name}: " . $data->{'indexfieldoptions'}->{$indexName}->{$name} . "\n";
267 }
268 }
269
[28034]270 # importOption and buildOption, just stored at top level, name=value,
271 # as per gs2 version
272 elsif ($element =~ /$topleveloptionexp/) {
273 if (defined $name) {
[36375]274 $name =~ s/^-//; # remove initial - if there was one
[28034]275 if (!defined $value) {
276 # flag option, set to true
277 $value = "true";
278 }
279 $data->{$name} = $value;
280 }
281 }
[15600]282
283 #@ plugout options
284 elsif ($element eq "plugout") {
285 $currentLevel = "plugout";
286 my $key = $nameMap->{$currentLevel};
287 if (!defined $data->{$key}) {
288 $data->{$key} = [];
289 }
290 if(defined $name and $name ne ""){
291 push (@{$data->{$key}},$name);
292 }
293 else{
[17747]294 push (@{$data->{$key}},"GreenstoneXMLPlugout");
[15600]295 }
296 }
297 if ($currentLevel eq "plugout" and $element eq "option") {
298 my $key = $nameMap->{$currentLevel};
299 if (defined $name and $name ne ""){
300 push (@{$data->{$key}},$name);
301 }
302 if (defined $value and $value ne ""){
303 push (@{$data->{$key}},$value);
304 }
305 }
306
307 #@ use hash of hash of strings: hashexp
[15619]308 elsif ($element =~ /$hashexp/) {
309 if (!defined $data->{$element}) {
310 $data->{$element} = {};
[15600]311 }
312 if (defined $name and $name =~ /\w/) {
313 if (defined $filter and $filter =~ /\w/) {
[15619]314 $data->{$element}->{$name} = $filter;
[15600]315
316 }
317 }
318 }
319
320 #@ Handling each classifier/plugin element
321 elsif ($element =~ /$arrayarrayexp/) {
322 # find the gs2 mapping name
323 $currentLevel = $element;
[26451]324 my $key = $nameMap->{$element};
325
[15600]326 # define an array of array of strings foreach $k (@{$data->{$key}}) {
327 if (!defined $data->{$key}) {
[26451]328 $data->{$key} = [];
[15600]329 }
[26451]330
[15600]331 # Push classifier/plugin name (e.g. AZList) into $data as the first string
[26451]332 push (@{$data->{$key}->[$currentIndex]},$name);
333 if (defined $value and $value =~ /\w/) {
334 push (@{$data->{$key}->[$currentIndex]}, $value);
335 print "$value\n";
336 }
[15600]337 #print $currentIndex."indexup\n";
[26450]338 }
339
[15600]340 #@ Handling the option elements in each classifier/plugin element (as the following strings)
341 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
342 # find the gs2 mapping name for classifier and plugin
343 my $key = $nameMap->{$currentLevel};
344
345 if (defined $name and $name =~ /\w/) {
346 push (@{$data->{$key}->[$currentIndex]}, $name);
347 }
[29461]348 if (defined $value and $value !~ /^\s*$/) {
[15600]349 push (@{$data->{$key}->[$currentIndex]}, $value);
350 }
351
352 }
[26451]353
354
[15600]355}
356
357sub EndTag {
358 my ($expat, $element) = @_;
[28034]359 my $endTags = q/^(browse|pluginList|displayItemList|indexOption)$/; #|buildOptionList)$/;
[15600]360 if ($element =~ /$endTags/) {
361 $currentIndex = 0;
362 $currentLevel = "";
363 }
[26450]364
[15600]365 # $arrayarrayexp contains classifier|plugin
[20102]366 elsif($element =~ /$arrayarrayexp/ ){
[15600]367 $currentIndex = $currentIndex + 1;
368 }
369}
370
371sub Text {
[15619]372 if (defined $currentLocation) {
373 #@ Handling block metadataList(creator, maintainer, public)
374 if($currentLocation =~ /$stringexp/){
375 #print $currentLocation;
376 my $key = $nameMap->{$currentLocation};
377 $data->{$key} = $_;
378 undef $currentLocation;
379 }
[15600]380
[15619]381 #@ Handling displayItem metadata that are children of displayItemList
382 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
383 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
384 my $lang = $currentAttrRef->{'lang'};
385 my $name = $currentAttrRef->{'name'};
386
387 # this is how data->collectionmeta's language is set in Greenstone 2.
388 # Need to be consistent, since export.pl accesses these values all in the same way
389 if(!defined $lang) {
390 $lang = 'default';
391 } else {
392 $lang = "[l=$lang]";
393 }
394
395 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
396 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
397 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
398 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
399 }
400 undef $currentLocation;
[15600]401 }
[15619]402
403 #@ Handling searchtype: plain,form; arrayexp
404 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
405 # map 'searchType' into 'searchtype'
406 my $key = $nameMap->{$currentLocation};
407 # split it by ','
408 my ($plain, $form) = split (",", $_);
409
410 if (!defined $data->{$key}) {
411 $data->{$key} = [];
412 }
413 if (defined $plain and $plain =~ /\w/) {
414 push @{ $data->{$key} }, $plain;
415 }
416 if (defined $form and $form =~ /\w/) {
417 push @{ $data->{$key} }, $form;
418 }
[15600]419 }
[15619]420 }
[15600]421}
[15619]422
[15600]423# This sub is for debugging purposes
424sub Display {
425 # metadataList
[15619]426 foreach my $k (keys %{$data}) {
427 print STDERR "*** metadatalist key $k\n";
428 }
429
[22456]430 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
431 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
432 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
433 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
434 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
435 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
[36839]436 print STDERR "*** orthogonal build types: ".join(",",@{$data->{"orthogonalbuildtypes"}})."\n" if (defined $data->{"orthogonalbuildtypes"});
437 if (defined $data->{"orthogonalcustomservicerack"}) {
438 print STDERR "*** orthogonal custom service racks: \n";
439 foreach my $s (keys %{$data->{"orthogonalcustomservicerack"}}) {
440 print STDERR "$s -> ". $data->{"orthogonalcustomservicerack"}->{$s}."\n";
441 }
442 }
[22456]443 print STDERR "*** search types: \n";
444 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
445 print STDERR "*** levels: \n";
446 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
447 print STDERR "*** index subcollections: \n";
448 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
449 print STDERR "*** indexes: \n";
450 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
451 print STDERR "*** index options: \n";
452 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
453 print STDERR "*** languages: \n";
454 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
455 print STDERR "*** language metadata: \n";
456 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
[15600]457
[22456]458 print STDERR "*** Plugins: \n";
[15600]459 if (defined $data->{'plugin'}) {
460 foreach $a (@{$data->{'plugin'}}) {
461 print join(",",@$a);
462 print "\n";
463 }
464 }
[22456]465
466 #print STDERR "*** Build options: \n";
467 #if (defined $data->{'store_metadata_coverage'}) {
468 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
469 # print join(",",@$a,@$_);
470 # print "\n";
471 #}
472 #}
473
[15600]474 if (defined $data->{'classify'}) {
[22456]475 print STDERR "*** Classifiers: \n";
[15600]476 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
477 }
478
479 if (defined $data->{'subcollection'}) {
480 foreach my $key (keys %{$data->{'subcollection'}}) {
481 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
482 }
483 }
484}
[20104]485# is this actually used??
[15600]486sub Doctype {
487 my ($expat, $name, $sysid, $pubid, $internal) = @_;
488
[20104]489 die if ($name !~ /^CollectionConfig$/);
[15600]490}
491
492# This Char function overrides the one in XML::Parser::Stream to overcome a
493# problem where $expat->{Text} is treated as the return value, slowing
494# things down significantly in some cases.
495sub Char {
496 if ($]<5.008) {
497 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
498 }
499 $_[0]->{'Text'} .= $_[1];
500 return undef;
501}
[15619]502
[15600]503
504
505
506#########################################################
507
5081;
Note: See TracBrowser for help on using the repository browser.