source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 26466

Last change on this file since 26466 was 26466, checked in by ak19, 11 years ago

Need to handle on/off flags which take optional values. If these flags are present, their 'value' defaults to true

  • Property svn:keywords set to Author Date Id Revision
File size: 16.8 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use XMLParser;
34
35# A mapping hash to resolve name discrepancy between gs2 and gs3.
36my $nameMap = {"key" => "value",
37 "creator" => "creator",
38 "maintainer" => "maintainer",
39 "public" => "public",
40 "infodb" => "infodbtype",
41 "defaultIndex" => "defaultindex",
42 "defaultLevel" => "defaultlevel",
43 "name" => "collectionname",
44 "description" => "collectionextra",
45 "smallicon" => "iconcollectionsmall",
46 "icon" => "iconcollection",
47 "level" => "levels",
48 "classifier" => "classify",
49 "indexSubcollection" => "indexsubcollections",
50 "indexLanguage" => "languages",
51 "defaultIndexLanguage" => "defaultlanguage",
52 "index" => "indexes",
53 "plugin" => "plugin",
54 "plugout" => "plugout",
55 "indexOption" => "indexoptions",
56 "searchType" => "searchtype",
57 "languageMetadata" => "languagemetadata",
58 "buildType" => "buildtype",
59 "orthogonalBuildTypes" => "orthogonalbuildtypes",
60 "buildOption" => "buildOption"
61 #"buildOptions" => "buildOptions", "importOptions" => "importOptions", no actual equivalents for importOptions and buildOptions in GS2
62 };
63# A hash structure which is returned by sub read_cfg_file.
64my $data = {};
65
66my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
67
68# use those unique attribute values to locate the text within the elements
69# creator, public, maintainer and within a displayItem.
70my $currentLocation = "";
71my $stringexp = q/^(creator|maintainer|public|buildType)$/;
72my $displayItemNames = q/^(name|description)$/;
73
74# For storing the attributes during the StartTag subroutine, so that
75# we can use it later in Text (or EndTag) subroutines
76my $currentAttrRef = undef;
77
78my $currentLevel = "";
79
80# Count the elements with same name within the same block
81# ("plugin", "option")
82my $currentIndex = 0;
83my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/; # |indexOption
84my $arrayarrayexp = q/^(plugin|classifier)$/; #|buildOption)$/;
85my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
86my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
87
88my $generalOptions = q/^(importOptions|buildOptions)$/; # becomes GS2 toplevel map of (name, value) pairs
89
90my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
91
92# Reads in the model collection configuration file, collectionConfig.xml,
93# into a structure which complies with the one used by gs2 (i.e. one read
94# in by &cfgread::read_cfg_file).
95sub read_cfg_file {
96 my ($filename) = @_;
97 $data = {};
98 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
99 return undef;
100 }
101
102 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
103
104 # create XML::Parser object for parsing metadata.xml files
105 my $parser = new XML::Parser('Style' => 'Stream',
106 'Pkg' => 'collConfigxml',
107 'Handlers' => {'Char' => \&Char,
108 'Doctype' => \&Doctype
109 });
110 if (!open (COLCFG, $filename)) {
111 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
112 } else {
113
114 $parser->parsefile ($filename);# (COLCFG);
115 close (COLCFG);
116 }
117
118 #&Display;
119 return $data;
120}
121
122sub StartTag {
123# Those marked with #@ will not be executed at the same time when this sub is being called
124# so that if/elsif is used to avoid unnecessary tests
125 my ($expat, $element) = @_;
126
127 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
128 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
129 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
130 $currentAttrRef = \%_;
131
132 my $name = $_{'name'};
133 my $value = $_{'value'};
134 my $type = $_{'type'};
135 my $orthogonal = $_{'orthogonal'};
136
137 # for subcollections
138 my $filter = $_{'filter'};
139
140 # was this just a flax thing??
141 my $assigned = $_{'assigned'};
142
143 #@ Marking repeated block
144 if ($element =~ /$repeatedBlock/) {
145 $currentIndex = 0;
146 }
147
148 #@ handling block metadataList
149 elsif (defined $name and $name =~ /$stringexp/){
150 $currentLocation = $name;
151 }
152 #@ handling default search index/level/indexLanguage and languageMetadata
153 elsif ($element =~ /$defaults/) {
154 if (defined $name and $name =~ /\w/) {
155 $data->{$nameMap->{$element}} = $name;
156 }
157 }
158
159 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
160 elsif($element eq "displayItemList") {
161 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
162 }
163 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
164 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
165 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
166 $currentLocation = $name;
167 }
168 }
169
170 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
171 elsif ($element eq "infodb") {
172 $data->{'infodbtype'} = $type;
173 }
174
175 #@ Handling indexer: mgpp/mg/lucene; stringexp
176 #@ Handling orthogonal indexers: audioDB; arrayexp
177 elsif ($element eq "search") {
178 if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) {
179 push(@{$data->{'orthogonalbuildtypes'}},$type);
180 }
181 else {
182 $data->{'buildtype'} = $type;
183 }
184 }
185
186 elsif ($element eq "store_metadata_coverage")
187 {
188## print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*";
189 $data->{'store_metadata_coverage'} = $value;
190 }
191
192 #@ Handling searchtype: plain,form; arrayexp
193 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
194 #@ Handling searchtype: plain, form
195 #$currentLocation = $name;
196 #}
197
198 #@ Handle index|level|indexSubcollection|indexLanguage #|indexOption
199 elsif ($element =~ /$arrayexp/) {
200 my $key = $nameMap->{$element};
201 if (!defined $data->{$key}) {
202 $data->{$key} = [];
203 }
204
205 push (@{$data->{$key}},$name);
206 }
207
208 #*****************************************
209 elsif ($element eq "buildOption") {
210 print STDERR "**** BUILD OPTION PAIR $name $value\n";
211 $data->{$name} = $value;
212 }
213
214
215 #@ indexoptions: accentfold/casefold/stem; arrayexp
216 # needs a separate section, since unlike other $arrayexp, indexOption has <option>s as child elements
217 # but should be stored in-memory different from $generalOptions (<buildOptions> and <importOptions>)
218 elsif ($element eq "indexOption") {
219 $currentLevel = "indexOption";
220 # find the gs2 mapping name
221 my $key = $nameMap->{$currentLevel};
222 if (!defined $data->{$key}) {
223 $data->{$key} = [];
224 }
225 }
226 elsif ($currentLevel eq "indexOption" and $element eq "option") {
227 my $key = $nameMap->{$currentLevel};
228 if (defined $name and $name =~ /\w/) {
229 push (@{$data->{$key}},$name);
230 }
231 }
232
233 #@ plugout options
234 elsif ($element eq "plugout") {
235 $currentLevel = "plugout";
236 my $key = $nameMap->{$currentLevel};
237 if (!defined $data->{$key}) {
238 $data->{$key} = [];
239 }
240 if(defined $name and $name ne ""){
241 push (@{$data->{$key}},$name);
242 }
243 else{
244 push (@{$data->{$key}},"GreenstoneXMLPlugout");
245 }
246 }
247 if ($currentLevel eq "plugout" and $element eq "option") {
248 my $key = $nameMap->{$currentLevel};
249 if (defined $name and $name ne ""){
250 push (@{$data->{$key}},$name);
251 }
252 if (defined $value and $value ne ""){
253 push (@{$data->{$key}},$value);
254 }
255 }
256
257 #@ use hash of hash of strings: hashexp
258 elsif ($element =~ /$hashexp/) {
259 if (!defined $data->{$element}) {
260 $data->{$element} = {};
261 }
262 if (defined $name and $name =~ /\w/) {
263 if (defined $filter and $filter =~ /\w/) {
264 $data->{$element}->{$name} = $filter;
265
266 }
267 }
268 }
269
270 #@ Handling each classifier/plugin element
271 elsif ($element =~ /$arrayarrayexp/) {
272 # find the gs2 mapping name
273 $currentLevel = $element;
274 my $key = $nameMap->{$element};
275
276 # define an array of array of strings foreach $k (@{$data->{$key}}) {
277 if (!defined $data->{$key}) {
278 $data->{$key} = [];
279 }
280
281 # Push classifier/plugin name (e.g. AZList) into $data as the first string
282 push (@{$data->{$key}->[$currentIndex]},$name);
283 if (defined $value and $value =~ /\w/) {
284 push (@{$data->{$key}->[$currentIndex]}, $value);
285 print "$value\n";
286 }
287 #print $currentIndex."indexup\n";
288 }
289
290 #@ Handling the option elements in each classifier/plugin element (as the following strings)
291 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
292 # find the gs2 mapping name for classifier and plugin
293 my $key = $nameMap->{$currentLevel};
294
295 if (defined $name and $name =~ /\w/) {
296 push (@{$data->{$key}->[$currentIndex]}, $name);
297 }
298 if (defined $value and $value =~ /\w/) {
299 push (@{$data->{$key}->[$currentIndex]}, $value);
300 }
301
302 }
303
304 #@ Handling each importOptions/buildOptions element
305 elsif ($element =~ /$generalOptions/) {
306 $currentLevel = $element;
307 #my $key = $nameMap->{$element}; # importOptions and buildOptions map to themselves, no equivalents in GS2
308
309 # define a map of string pairs
310 if (!defined $data->{$element}) {
311 $data->{$element} = {};
312 }
313 }
314 #@ Handling the option elements in an importOptions/buildOptions element, which are of the form:
315 # <importOptions><option name="n" value="v"/><option .../></importOptions>
316 # these get stored in-memory at the top level as (n, v) pairs, just as in GS2
317 elsif ($currentLevel =~ /$generalOptions/ and $element eq "option") {
318 if (defined $name and $name =~ /\w/) {
319
320 # if a flag's value is on or 1, set it to true. If the value is off or 0, set it to false. (GS2 expects true/false)
321 # But such on/off flags (like -gli) need not be given a value, in which case the value defaults to 'true' again.
322
323 if(defined $value and $value =~ /\w/) {
324
325 if ($value =~ m/^(on|1)$/) {
326 $value = "true";
327 } elsif ($value =~ m/^(off|0)$/) {
328 $value = "false";
329 } # else, use whatever value is provided
330
331 }
332 else {
333 $value = "true";
334 }
335 $data->{$name} = $value; #$data->{$currentLevel}->{$name} = $value;
336 #print "@@@@ Added $currentLevel option: $name=$value\n";
337 }
338 }
339
340}
341
342sub EndTag {
343 my ($expat, $element) = @_;
344 my $endTags = q/^(browse|pluginList|displayItemList|indexOption|importOptions|buildOptions)$/; #|buildOptionList)$/;
345 if ($element =~ /$endTags/) {
346 $currentIndex = 0;
347 $currentLevel = "";
348 }
349
350 # $arrayarrayexp contains classifier|plugin
351 elsif($element =~ /$arrayarrayexp/ ){
352 $currentIndex = $currentIndex + 1;
353 }
354}
355
356sub Text {
357 if (defined $currentLocation) {
358 #@ Handling block metadataList(creator, maintainer, public)
359 if($currentLocation =~ /$stringexp/){
360 #print $currentLocation;
361 my $key = $nameMap->{$currentLocation};
362 $data->{$key} = $_;
363 undef $currentLocation;
364 }
365
366 #@ Handling displayItem metadata that are children of displayItemList
367 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
368 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
369 my $lang = $currentAttrRef->{'lang'};
370 my $name = $currentAttrRef->{'name'};
371
372 # this is how data->collectionmeta's language is set in Greenstone 2.
373 # Need to be consistent, since export.pl accesses these values all in the same way
374 if(!defined $lang) {
375 $lang = 'default';
376 } else {
377 $lang = "[l=$lang]";
378 }
379
380 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
381 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
382 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
383 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
384 }
385 undef $currentLocation;
386 }
387
388 #@ Handling searchtype: plain,form; arrayexp
389 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
390 # map 'searchType' into 'searchtype'
391 my $key = $nameMap->{$currentLocation};
392 # split it by ','
393 my ($plain, $form) = split (",", $_);
394
395 if (!defined $data->{$key}) {
396 $data->{$key} = [];
397 }
398 if (defined $plain and $plain =~ /\w/) {
399 push @{ $data->{$key} }, $plain;
400 }
401 if (defined $form and $form =~ /\w/) {
402 push @{ $data->{$key} }, $form;
403 }
404 }
405 }
406}
407
408# This sub is for debugging purposes
409sub Display {
410 # metadataList
411 foreach my $k (keys %{$data}) {
412 print STDERR "*** metadatalist key $k\n";
413 }
414
415 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
416 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
417 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
418 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
419 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
420 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
421 print STDERR "*** orthogonal build types: ".join(",",$data->{"orthogonalbuildtypes"})."\n" if (defined $data->{"orthogonalbuildtypes"});
422 print STDERR "*** search types: \n";
423 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
424 print STDERR "*** levels: \n";
425 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
426 print STDERR "*** index subcollections: \n";
427 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
428 print STDERR "*** indexes: \n";
429 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
430 print STDERR "*** index options: \n";
431 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
432 print STDERR "*** languages: \n";
433 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
434 print STDERR "*** language metadata: \n";
435 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
436
437 print STDERR "*** Plugins: \n";
438 if (defined $data->{'plugin'}) {
439 foreach $a (@{$data->{'plugin'}}) {
440 print join(",",@$a);
441 print "\n";
442 }
443 }
444
445 #print STDERR "*** Build options: \n";
446 #if (defined $data->{'store_metadata_coverage'}) {
447 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
448 # print join(",",@$a,@$_);
449 # print "\n";
450 #}
451 #}
452
453 if (defined $data->{'classify'}) {
454 print STDERR "*** Classifiers: \n";
455 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
456 }
457
458 if (defined $data->{'subcollection'}) {
459 foreach my $key (keys %{$data->{'subcollection'}}) {
460 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
461 }
462 }
463}
464# is this actually used??
465sub Doctype {
466 my ($expat, $name, $sysid, $pubid, $internal) = @_;
467
468 die if ($name !~ /^CollectionConfig$/);
469}
470
471# This Char function overrides the one in XML::Parser::Stream to overcome a
472# problem where $expat->{Text} is treated as the return value, slowing
473# things down significantly in some cases.
474sub Char {
475 if ($]<5.008) {
476 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
477 }
478 $_[0]->{'Text'} .= $_[1];
479 return undef;
480}
481
482
483
484
485#########################################################
486
4871;
Note: See TracBrowser for help on using the repository browser.