source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 26451

Last change on this file since 26451 was 26451, checked in by ak19, 11 years ago
  1. Fixed processing of the collectionconfig's indexOption element. 2. Correct set of changes for processing the new toplevel importOptions and buildOptions elements of collectionConfig.xml (which can contain such options as specify OIDtype, OIDmetadata, verbosity). 3. Undoing previous commits, since the importOptions and buildOptions elememts are not nested inside plugins but are one of the toplevel elements of collectionConfig.xml. And do not need the recently-committed changes to inexport.pm either, since any command line args for import and buildOptions will override what's in collectionConfig.xml anyway.
  • Property svn:keywords set to Author Date Id Revision
File size: 16.4 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use XMLParser;
34
35# A mapping hash to resolve name discrepancy between gs2 and gs3.
36my $nameMap = {"key" => "value",
37 "creator" => "creator",
38 "maintainer" => "maintainer",
39 "public" => "public",
40 "infodb" => "infodbtype",
41 "defaultIndex" => "defaultindex",
42 "defaultLevel" => "defaultlevel",
43 "name" => "collectionname",
44 "description" => "collectionextra",
45 "smallicon" => "iconcollectionsmall",
46 "icon" => "iconcollection",
47 "level" => "levels",
48 "classifier" => "classify",
49 "indexSubcollection" => "indexsubcollections",
50 "indexLanguage" => "languages",
51 "defaultIndexLanguage" => "defaultlanguage",
52 "index" => "indexes",
53 "plugin" => "plugin",
54 "plugout" => "plugout",
55 "indexOption" => "indexoptions",
56 "searchType" => "searchtype",
57 "languageMetadata" => "languagemetadata",
58 "buildType" => "buildtype",
59 "orthogonalBuildTypes" => "orthogonalbuildtypes",
60 "buildOption" => "buildOption"
61 #"buildOptions" => "buildOptions", "importOptions" => "importOptions", no actual equivalents for importOptions and buildOptions in GS2
62 };
63# A hash structure which is returned by sub read_cfg_file.
64my $data = {};
65
66my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
67
68# use those unique attribute values to locate the text within the elements
69# creator, public, maintainer and within a displayItem.
70my $currentLocation = "";
71my $stringexp = q/^(creator|maintainer|public|buildType)$/;
72my $displayItemNames = q/^(name|description)$/;
73
74# For storing the attributes during the StartTag subroutine, so that
75# we can use it later in Text (or EndTag) subroutines
76my $currentAttrRef = undef;
77
78my $currentLevel = "";
79
80# Count the elements with same name within the same block
81# ("plugin", "option")
82my $currentIndex = 0;
83my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/; # |indexOption
84my $arrayarrayexp = q/^(plugin|classifier)$/; #|buildOption)$/;
85my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
86my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
87
88my $generalOptions = q/^(importOptions|buildOptions)$/; # becomes GS2 toplevel map of (name, value) pairs
89
90my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
91
92# Reads in the model collection configuration file, collectionConfig.xml,
93# into a structure which complies with the one used by gs2 (i.e. one read
94# in by &cfgread::read_cfg_file).
95sub read_cfg_file {
96 my ($filename) = @_;
97 $data = {};
98 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
99 return undef;
100 }
101
102 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
103
104 # create XML::Parser object for parsing metadata.xml files
105 my $parser = new XML::Parser('Style' => 'Stream',
106 'Pkg' => 'collConfigxml',
107 'Handlers' => {'Char' => \&Char,
108 'Doctype' => \&Doctype
109 });
110 if (!open (COLCFG, $filename)) {
111 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
112 } else {
113
114 $parser->parsefile ($filename);# (COLCFG);
115 close (COLCFG);
116 }
117
118 #&Display;
119 return $data;
120}
121
122sub StartTag {
123# Those marked with #@ will not be executed at the same time when this sub is being called
124# so that if/elsif is used to avoid unnecessary tests
125 my ($expat, $element) = @_;
126
127 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
128 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
129 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
130 $currentAttrRef = \%_;
131
132 my $name = $_{'name'};
133 my $value = $_{'value'};
134 my $type = $_{'type'};
135 my $orthogonal = $_{'orthogonal'};
136
137 # for subcollections
138 my $filter = $_{'filter'};
139
140 # was this just a flax thing??
141 my $assigned = $_{'assigned'};
142
143 #@ Marking repeated block
144 if ($element =~ /$repeatedBlock/) {
145 $currentIndex = 0;
146 }
147
148 #@ handling block metadataList
149 elsif (defined $name and $name =~ /$stringexp/){
150 $currentLocation = $name;
151 }
152 #@ handling default search index/level/indexLanguage and languageMetadata
153 elsif ($element =~ /$defaults/) {
154 if (defined $name and $name =~ /\w/) {
155 $data->{$nameMap->{$element}} = $name;
156 }
157 }
158
159 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
160 elsif($element eq "displayItemList") {
161 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
162 }
163 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
164 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
165 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
166 $currentLocation = $name;
167 }
168 }
169
170 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
171 elsif ($element eq "infodb") {
172 $data->{'infodbtype'} = $type;
173 }
174
175 #@ Handling indexer: mgpp/mg/lucene; stringexp
176 #@ Handling orthogonal indexers: audioDB; arrayexp
177 elsif ($element eq "search") {
178 if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) {
179 push(@{$data->{'orthogonalbuildtypes'}},$type);
180 }
181 else {
182 $data->{'buildtype'} = $type;
183 }
184 }
185
186 elsif ($element eq "store_metadata_coverage")
187 {
188## print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*";
189 $data->{'store_metadata_coverage'} = $value;
190 }
191
192 #@ Handling searchtype: plain,form; arrayexp
193 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
194 #@ Handling searchtype: plain, form
195 #$currentLocation = $name;
196 #}
197
198 #@ Handle index|level|indexSubcollection|indexLanguage #|indexOption
199 elsif ($element =~ /$arrayexp/) {
200 my $key = $nameMap->{$element};
201 if (!defined $data->{$key}) {
202 $data->{$key} = [];
203 }
204
205 push (@{$data->{$key}},$name);
206 }
207
208 #*****************************************
209 elsif ($element eq "buildOption") {
210 print STDERR "**** BUILD OPTION PAIR $name $value\n";
211 $data->{$name} = $value;
212 }
213
214
215 #@ indexoptions: accentfold/casefold/stem; arrayexp
216 # needs a separate section, since unlike other $arrayexp, indexOption has <option>s as child elements
217 # but should be stored in-memory different from $generalOptions (<buildOptions> and <importOptions>)
218 elsif ($element eq "indexOption") {
219 $currentLevel = "indexOption";
220 # find the gs2 mapping name
221 my $key = $nameMap->{$currentLevel};
222 if (!defined $data->{$key}) {
223 $data->{$key} = [];
224 }
225 }
226 elsif ($currentLevel eq "indexOption" and $element eq "option") {
227 my $key = $nameMap->{$currentLevel};
228 if (defined $name and $name =~ /\w/) {
229 push (@{$data->{$key}},$name);
230 }
231 }
232
233 #@ plugout options
234 elsif ($element eq "plugout") {
235 $currentLevel = "plugout";
236 my $key = $nameMap->{$currentLevel};
237 if (!defined $data->{$key}) {
238 $data->{$key} = [];
239 }
240 if(defined $name and $name ne ""){
241 push (@{$data->{$key}},$name);
242 }
243 else{
244 push (@{$data->{$key}},"GreenstoneXMLPlugout");
245 }
246 }
247 if ($currentLevel eq "plugout" and $element eq "option") {
248 my $key = $nameMap->{$currentLevel};
249 if (defined $name and $name ne ""){
250 push (@{$data->{$key}},$name);
251 }
252 if (defined $value and $value ne ""){
253 push (@{$data->{$key}},$value);
254 }
255 }
256
257 #@ use hash of hash of strings: hashexp
258 elsif ($element =~ /$hashexp/) {
259 if (!defined $data->{$element}) {
260 $data->{$element} = {};
261 }
262 if (defined $name and $name =~ /\w/) {
263 if (defined $filter and $filter =~ /\w/) {
264 $data->{$element}->{$name} = $filter;
265
266 }
267 }
268 }
269
270 #@ Handling each classifier/plugin element
271 elsif ($element =~ /$arrayarrayexp/) {
272 # find the gs2 mapping name
273 $currentLevel = $element;
274 my $key = $nameMap->{$element};
275
276 # define an array of array of strings foreach $k (@{$data->{$key}}) {
277 if (!defined $data->{$key}) {
278 $data->{$key} = [];
279 }
280
281 # Push classifier/plugin name (e.g. AZList) into $data as the first string
282 push (@{$data->{$key}->[$currentIndex]},$name);
283 if (defined $value and $value =~ /\w/) {
284 push (@{$data->{$key}->[$currentIndex]}, $value);
285 print "$value\n";
286 }
287 #print $currentIndex."indexup\n";
288 }
289
290 #@ Handling the option elements in each classifier/plugin element (as the following strings)
291 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
292 # find the gs2 mapping name for classifier and plugin
293 my $key = $nameMap->{$currentLevel};
294
295 if (defined $name and $name =~ /\w/) {
296 push (@{$data->{$key}->[$currentIndex]}, $name);
297 }
298 if (defined $value and $value =~ /\w/) {
299 push (@{$data->{$key}->[$currentIndex]}, $value);
300 }
301
302 }
303
304 #@ Handling each importOptions/buildOptions element
305 elsif ($element =~ /$generalOptions/) {
306 $currentLevel = $element;
307 #my $key = $nameMap->{$element}; # importOptions and buildOptions map to themselves, no equivalents in GS2
308
309 # define a map of string pairs
310 if (!defined $data->{$element}) {
311 $data->{$element} = {};
312 }
313 }
314 #@ Handling the option elements in an importOptions/buildOptions element, which are of the form:
315 # <importOptions><option name="n" value="v"/><option .../></importOptions>
316 # these get stored in memory at the top level as (n, v) pairs, as in GS2
317 elsif ($currentLevel =~ /$generalOptions/ and $element eq "option") {
318 if (defined $name and $name =~ /\w/ and defined $value and $value =~ /\w/) {
319 #$data->{$currentLevel}->{$name} = $value;
320 $data->{$name} = $value;
321 print "@@@@ Added $currentLevel option: $name=$value\n";
322 }
323 }
324
325}
326
327sub EndTag {
328 my ($expat, $element) = @_;
329 my $endTags = q/^(browse|pluginList|displayItemList|indexOption|importOptions|buildOptions)$/; #|buildOptionList)$/;
330 if ($element =~ /$endTags/) {
331 $currentIndex = 0;
332 $currentLevel = "";
333 }
334
335 # $arrayarrayexp contains classifier|plugin
336 elsif($element =~ /$arrayarrayexp/ ){
337 $currentIndex = $currentIndex + 1;
338 }
339}
340
341sub Text {
342 if (defined $currentLocation) {
343 #@ Handling block metadataList(creator, maintainer, public)
344 if($currentLocation =~ /$stringexp/){
345 #print $currentLocation;
346 my $key = $nameMap->{$currentLocation};
347 $data->{$key} = $_;
348 undef $currentLocation;
349 }
350
351 #@ Handling displayItem metadata that are children of displayItemList
352 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
353 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
354 my $lang = $currentAttrRef->{'lang'};
355 my $name = $currentAttrRef->{'name'};
356
357 # this is how data->collectionmeta's language is set in Greenstone 2.
358 # Need to be consistent, since export.pl accesses these values all in the same way
359 if(!defined $lang) {
360 $lang = 'default';
361 } else {
362 $lang = "[l=$lang]";
363 }
364
365 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
366 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
367 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
368 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
369 }
370 undef $currentLocation;
371 }
372
373 #@ Handling searchtype: plain,form; arrayexp
374 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
375 # map 'searchType' into 'searchtype'
376 my $key = $nameMap->{$currentLocation};
377 # split it by ','
378 my ($plain, $form) = split (",", $_);
379
380 if (!defined $data->{$key}) {
381 $data->{$key} = [];
382 }
383 if (defined $plain and $plain =~ /\w/) {
384 push @{ $data->{$key} }, $plain;
385 }
386 if (defined $form and $form =~ /\w/) {
387 push @{ $data->{$key} }, $form;
388 }
389 }
390 }
391}
392
393# This sub is for debugging purposes
394sub Display {
395 # metadataList
396 foreach my $k (keys %{$data}) {
397 print STDERR "*** metadatalist key $k\n";
398 }
399
400 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
401 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
402 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
403 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
404 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
405 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
406 print STDERR "*** orthogonal build types: ".join(",",$data->{"orthogonalbuildtypes"})."\n" if (defined $data->{"orthogonalbuildtypes"});
407 print STDERR "*** search types: \n";
408 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
409 print STDERR "*** levels: \n";
410 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
411 print STDERR "*** index subcollections: \n";
412 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
413 print STDERR "*** indexes: \n";
414 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
415 print STDERR "*** index options: \n";
416 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
417 print STDERR "*** languages: \n";
418 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
419 print STDERR "*** language metadata: \n";
420 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
421
422 print STDERR "*** Plugins: \n";
423 if (defined $data->{'plugin'}) {
424 foreach $a (@{$data->{'plugin'}}) {
425 print join(",",@$a);
426 print "\n";
427 }
428 }
429
430 #print STDERR "*** Build options: \n";
431 #if (defined $data->{'store_metadata_coverage'}) {
432 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
433 # print join(",",@$a,@$_);
434 # print "\n";
435 #}
436 #}
437
438 if (defined $data->{'classify'}) {
439 print STDERR "*** Classifiers: \n";
440 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
441 }
442
443 if (defined $data->{'subcollection'}) {
444 foreach my $key (keys %{$data->{'subcollection'}}) {
445 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
446 }
447 }
448}
449# is this actually used??
450sub Doctype {
451 my ($expat, $name, $sysid, $pubid, $internal) = @_;
452
453 die if ($name !~ /^CollectionConfig$/);
454}
455
456# This Char function overrides the one in XML::Parser::Stream to overcome a
457# problem where $expat->{Text} is treated as the return value, slowing
458# things down significantly in some cases.
459sub Char {
460 if ($]<5.008) {
461 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
462 }
463 $_[0]->{'Text'} .= $_[1];
464 return undef;
465}
466
467
468
469
470#########################################################
471
4721;
Note: See TracBrowser for help on using the repository browser.