source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 27192

Last change on this file since 27192 was 27192, checked in by davidb, 11 years ago

Extra test added to avoid putting 'undef' into an array of values. Problem originally showed up with 'indexoptions'

  • Property svn:keywords set to Author Date Id Revision
File size: 16.1 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use XMLParser;
34
35# A mapping hash to resolve name discrepancy between gs2 and gs3.
36my $nameMap = {"key" => "value",
37 "creator" => "creator",
38 "maintainer" => "maintainer",
39 "public" => "public",
40 "infodb" => "infodbtype",
41 "defaultIndex" => "defaultindex",
42 "defaultLevel" => "defaultlevel",
43 "name" => "collectionname",
44 "description" => "collectionextra",
45 "smallicon" => "iconcollectionsmall",
46 "icon" => "iconcollection",
47 "level" => "levels",
48 "classifier" => "classify",
49 "indexSubcollection" => "indexsubcollections",
50 "indexLanguage" => "languages",
51 "defaultIndexLanguage" => "defaultlanguage",
52 "index" => "indexes",
53 "plugin" => "plugin",
54 "plugout" => "plugout",
55 "indexOption" => "indexoptions",
56 "searchType" => "searchtype",
57 "languageMetadata" => "languagemetadata",
58 "buildType" => "buildtype",
59 "orthogonalBuildTypes" => "orthogonalbuildtypes",
60 "buildOption" => "buildOption"
61 #"buildOptions" => "buildOptions", "importOptions" => "importOptions", no actual equivalents for importOptions and buildOptions in GS2
62 };
63# A hash structure which is returned by sub read_cfg_file.
64my $data = {};
65
66my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
67
68# use those unique attribute values to locate the text within the elements
69# creator, public, maintainer and within a displayItem.
70my $currentLocation = "";
71my $stringexp = q/^(creator|maintainer|public|buildType)$/;
72my $displayItemNames = q/^(name|description)$/;
73
74# For storing the attributes during the StartTag subroutine, so that
75# we can use it later in Text (or EndTag) subroutines
76my $currentAttrRef = undef;
77
78my $currentLevel = "";
79
80# Count the elements with same name within the same block
81# ("plugin", "option")
82my $currentIndex = 0;
83my $arrayexp = q/^(index|level|indexOption|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/;
84my $arrayarrayexp = q/^(plugin|classifier)$/; #|buildOption)$/;
85my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
86my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
87
88my $generalOptions = q/^(importOptions|buildOptions)$/; # becomes GS2 toplevel map of (name, value) pairs
89
90my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
91
92# Reads in the model collection configuration file, collectionConfig.xml,
93# into a structure which complies with the one used by gs2 (i.e. one read
94# in by &cfgread::read_cfg_file).
95sub read_cfg_file {
96 my ($filename) = @_;
97 $data = {};
98 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
99 return undef;
100 }
101
102 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
103
104 # create XML::Parser object for parsing metadata.xml files
105 my $parser = new XML::Parser('Style' => 'Stream',
106 'Pkg' => 'collConfigxml',
107 'Handlers' => {'Char' => \&Char,
108 'Doctype' => \&Doctype
109 });
110 if (!open (COLCFG, $filename)) {
111 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
112 } else {
113
114 $parser->parsefile ($filename);# (COLCFG);
115 close (COLCFG);
116 }
117
118 #&Display;
119 return $data;
120}
121
122sub StartTag {
123# Those marked with #@ will not be executed at the same time when this sub is being called
124# so that if/elsif is used to avoid unnecessary tests
125 my ($expat, $element) = @_;
126
127 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
128 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
129 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
130 $currentAttrRef = \%_;
131
132 my $name = $_{'name'};
133 my $value = $_{'value'};
134 my $type = $_{'type'};
135 my $orthogonal = $_{'orthogonal'};
136
137 # for subcollections
138 my $filter = $_{'filter'};
139
140 # was this just a flax thing??
141 my $assigned = $_{'assigned'};
142
143 #@ Marking repeated block
144 if ($element =~ /$repeatedBlock/) {
145 $currentIndex = 0;
146 }
147
148 #@ handling block metadataList
149 elsif (defined $name and $name =~ /$stringexp/){
150 $currentLocation = $name;
151 }
152 #@ handling default search index/level/indexLanguage and languageMetadata
153 elsif ($element =~ /$defaults/) {
154 if (defined $name and $name =~ /\w/) {
155 $data->{$nameMap->{$element}} = $name;
156 }
157 }
158
159 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
160 elsif($element eq "displayItemList") {
161 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
162 }
163 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
164 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
165 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
166 $currentLocation = $name;
167 }
168 }
169
170 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
171 elsif ($element eq "infodb") {
172 $data->{'infodbtype'} = $type;
173 }
174
175 #@ Handling indexer: mgpp/mg/lucene; stringexp
176 #@ Handling orthogonal indexers: audioDB; arrayexp
177 elsif ($element eq "search") {
178 if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) {
179 push(@{$data->{'orthogonalbuildtypes'}},$type);
180 }
181 else {
182 $data->{'buildtype'} = $type;
183 }
184 }
185
186 elsif ($element eq "store_metadata_coverage")
187 {
188## print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*";
189 $data->{'store_metadata_coverage'} = $value;
190 }
191
192 #@ Handling searchtype: plain,form; arrayexp
193 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
194 #@ Handling searchtype: plain, form
195 #$currentLocation = $name;
196 #}
197
198 #@ Handle index|level|indexOption|indexSubcollection|indexLanguage
199 elsif ($element =~ /$arrayexp/) {
200 my $key = $nameMap->{$element};
201 if (!defined $data->{$key}) {
202 $data->{$key} = [];
203 }
204
205 if (defined $name) {
206 push (@{$data->{$key}},$name);
207 }
208 }
209
210 #*****************************************
211 elsif ($element eq "buildOption") {
212 print STDERR "**** BUILD OPTION PAIR $name $value\n";
213 $data->{$name} = $value;
214 }
215
216 #@ plugout options
217 elsif ($element eq "plugout") {
218 $currentLevel = "plugout";
219 my $key = $nameMap->{$currentLevel};
220 if (!defined $data->{$key}) {
221 $data->{$key} = [];
222 }
223 if(defined $name and $name ne ""){
224 push (@{$data->{$key}},$name);
225 }
226 else{
227 push (@{$data->{$key}},"GreenstoneXMLPlugout");
228 }
229 }
230 if ($currentLevel eq "plugout" and $element eq "option") {
231 my $key = $nameMap->{$currentLevel};
232 if (defined $name and $name ne ""){
233 push (@{$data->{$key}},$name);
234 }
235 if (defined $value and $value ne ""){
236 push (@{$data->{$key}},$value);
237 }
238 }
239
240 #@ use hash of hash of strings: hashexp
241 elsif ($element =~ /$hashexp/) {
242 if (!defined $data->{$element}) {
243 $data->{$element} = {};
244 }
245 if (defined $name and $name =~ /\w/) {
246 if (defined $filter and $filter =~ /\w/) {
247 $data->{$element}->{$name} = $filter;
248
249 }
250 }
251 }
252
253 #@ Handling each classifier/plugin element
254 elsif ($element =~ /$arrayarrayexp/) {
255 # find the gs2 mapping name
256 $currentLevel = $element;
257 my $key = $nameMap->{$element};
258
259 # define an array of array of strings foreach $k (@{$data->{$key}}) {
260 if (!defined $data->{$key}) {
261 $data->{$key} = [];
262 }
263
264 # Push classifier/plugin name (e.g. AZList) into $data as the first string
265 push (@{$data->{$key}->[$currentIndex]},$name);
266 if (defined $value and $value =~ /\w/) {
267 push (@{$data->{$key}->[$currentIndex]}, $value);
268 print "$value\n";
269 }
270 #print $currentIndex."indexup\n";
271 }
272
273 #@ Handling the option elements in each classifier/plugin element (as the following strings)
274 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
275 # find the gs2 mapping name for classifier and plugin
276 my $key = $nameMap->{$currentLevel};
277
278 if (defined $name and $name =~ /\w/) {
279 push (@{$data->{$key}->[$currentIndex]}, $name);
280 }
281 if (defined $value and $value =~ /\w/) {
282 push (@{$data->{$key}->[$currentIndex]}, $value);
283 }
284
285 }
286
287 #@ Handling each importOptions/buildOptions element
288 elsif ($element =~ /$generalOptions/) {
289 $currentLevel = $element;
290 #my $key = $nameMap->{$element}; # importOptions and buildOptions map to themselves, no equivalents in GS2
291
292 # define a map of string pairs
293 if (!defined $data->{$element}) {
294 $data->{$element} = {};
295 }
296 }
297 #@ Handling the option elements in an importOptions/buildOptions element, which are of the form:
298 # <importOptions><option name="n" value="v"/><option .../></importOptions>
299 # these get stored in-memory at the top level as (n, v) pairs, just as in GS2
300 elsif ($currentLevel =~ /$generalOptions/ and $element eq "option") {
301 if (defined $name and $name =~ /\w/) {
302
303 # if a flag's value is on or 1, set it to true. If the value is off or 0, set it to false. (GS2 expects true/false)
304 # But such on/off flags (like -gli) need not be given a value, in which case the value defaults to 'true' again.
305
306 if(defined $value and $value =~ /\w/) {
307
308 if ($value =~ m/^(on|1)$/) {
309 $value = "true";
310 } elsif ($value =~ m/^(off|0)$/) {
311 $value = "false";
312 } # else, use whatever value is provided
313
314 }
315 else {
316 $value = "true";
317 }
318 $data->{$name} = $value; #$data->{$currentLevel}->{$name} = $value;
319 #print "@@@@ Added $currentLevel option: $name=$value\n";
320 }
321 }
322
323}
324
325sub EndTag {
326 my ($expat, $element) = @_;
327 my $endTags = q/^(browse|pluginList|displayItemList|indexOption|importOptions|buildOptions)$/; #|buildOptionList)$/;
328 if ($element =~ /$endTags/) {
329 $currentIndex = 0;
330 $currentLevel = "";
331 }
332
333 # $arrayarrayexp contains classifier|plugin
334 elsif($element =~ /$arrayarrayexp/ ){
335 $currentIndex = $currentIndex + 1;
336 }
337}
338
339sub Text {
340 if (defined $currentLocation) {
341 #@ Handling block metadataList(creator, maintainer, public)
342 if($currentLocation =~ /$stringexp/){
343 #print $currentLocation;
344 my $key = $nameMap->{$currentLocation};
345 $data->{$key} = $_;
346 undef $currentLocation;
347 }
348
349 #@ Handling displayItem metadata that are children of displayItemList
350 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
351 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
352 my $lang = $currentAttrRef->{'lang'};
353 my $name = $currentAttrRef->{'name'};
354
355 # this is how data->collectionmeta's language is set in Greenstone 2.
356 # Need to be consistent, since export.pl accesses these values all in the same way
357 if(!defined $lang) {
358 $lang = 'default';
359 } else {
360 $lang = "[l=$lang]";
361 }
362
363 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
364 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
365 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
366 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
367 }
368 undef $currentLocation;
369 }
370
371 #@ Handling searchtype: plain,form; arrayexp
372 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
373 # map 'searchType' into 'searchtype'
374 my $key = $nameMap->{$currentLocation};
375 # split it by ','
376 my ($plain, $form) = split (",", $_);
377
378 if (!defined $data->{$key}) {
379 $data->{$key} = [];
380 }
381 if (defined $plain and $plain =~ /\w/) {
382 push @{ $data->{$key} }, $plain;
383 }
384 if (defined $form and $form =~ /\w/) {
385 push @{ $data->{$key} }, $form;
386 }
387 }
388 }
389}
390
391# This sub is for debugging purposes
392sub Display {
393 # metadataList
394 foreach my $k (keys %{$data}) {
395 print STDERR "*** metadatalist key $k\n";
396 }
397
398 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
399 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
400 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
401 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
402 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
403 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
404 print STDERR "*** orthogonal build types: ".join(",",$data->{"orthogonalbuildtypes"})."\n" if (defined $data->{"orthogonalbuildtypes"});
405 print STDERR "*** search types: \n";
406 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
407 print STDERR "*** levels: \n";
408 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
409 print STDERR "*** index subcollections: \n";
410 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
411 print STDERR "*** indexes: \n";
412 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
413 print STDERR "*** index options: \n";
414 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
415 print STDERR "*** languages: \n";
416 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
417 print STDERR "*** language metadata: \n";
418 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
419
420 print STDERR "*** Plugins: \n";
421 if (defined $data->{'plugin'}) {
422 foreach $a (@{$data->{'plugin'}}) {
423 print join(",",@$a);
424 print "\n";
425 }
426 }
427
428 #print STDERR "*** Build options: \n";
429 #if (defined $data->{'store_metadata_coverage'}) {
430 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
431 # print join(",",@$a,@$_);
432 # print "\n";
433 #}
434 #}
435
436 if (defined $data->{'classify'}) {
437 print STDERR "*** Classifiers: \n";
438 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
439 }
440
441 if (defined $data->{'subcollection'}) {
442 foreach my $key (keys %{$data->{'subcollection'}}) {
443 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
444 }
445 }
446}
447# is this actually used??
448sub Doctype {
449 my ($expat, $name, $sysid, $pubid, $internal) = @_;
450
451 die if ($name !~ /^CollectionConfig$/);
452}
453
454# This Char function overrides the one in XML::Parser::Stream to overcome a
455# problem where $expat->{Text} is treated as the return value, slowing
456# things down significantly in some cases.
457sub Char {
458 if ($]<5.008) {
459 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
460 }
461 $_[0]->{'Text'} .= $_[1];
462 return undef;
463}
464
465
466
467
468#########################################################
469
4701;
Note: See TracBrowser for help on using the repository browser.