source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 22485

Last change on this file since 22485 was 22485, checked in by ak19, 14 years ago
  1. Dr Bainbridge fixed the database perl modules to all have the method read_info_keys (which reads the keys from the database into a map), so that dbutil.pm can have the same as a generic method. 2. buildConfigxml.pm only writes out the defaultIndex if it is set (to prevent an Uninitialised Variable warning message from Perl).
  • Property svn:keywords set to Author Date Id Revision
File size: 15.2 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used. Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37 my $perl_dir;
38
39 # Note: $] encodes the version number of perl
40 if ($]>=5.010) {
41 $perl_dir = "perl-5.10";
42 }
43 elsif ($]>5.008) {
44 # perl 5.8.1 or above
45 $perl_dir = "perl-5.8";
46 }
47 elsif ($]<5.008) {
48 # assume perl 5.6
49 $perl_dir = "perl-5.6";
50 }
51 else {
52 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
53 print STDERR " Please upgrade to a newer version of Perl.\n";
54 $perl_dir = "perl-5.8";
55 }
56
57 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
58 # Use push to put this on the end, so an existing XML::Parser will be used by default
59 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
60 }
61}
62
63use XML::Parser;
64
65# A mapping hash to resolve name discrepancy between gs2 and gs3.
66my $nameMap = {"key" => "value",
67 "creator" => "creator",
68 "maintainer" => "maintainer",
69 "public" => "public",
70 "infodb" => "infodbtype",
71 "defaultIndex" => "defaultindex",
72 "defaultLevel" => "defaultlevel",
73 "name" => "collectionname",
74 "description" => "collectionextra",
75 "smallicon" => "iconcollectionsmall",
76 "icon" => "iconcollection",
77 "level" => "levels",
78 "classifier" => "classify",
79 "indexSubcollection" => "indexsubcollections",
80 "indexLanguage" => "languages",
81 "defaultIndexLanguage" => "defaultlanguage",
82 "index" => "indexes",
83 "plugin" => "plugin",
84 "plugout" => "plugout",
85 "indexOption" => "indexoptions",
86 "searchType" => "searchtype",
87 "languageMetadata" => "languagemetadata",
88 "buildType" => "buildtype",
89 "buildOption" => "buildOption"
90 };
91# A hash structure which is returned by sub read_cfg_file.
92my $data = {};
93
94my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
95
96# use those unique attribute values to locate the text within the elements
97# creator, public, maintainer and within a displayItem.
98my $currentLocation = "";
99my $stringexp = q/^(creator|maintainer|public|buildType)$/;
100my $displayItemNames = q/^(name|description)$/;
101
102# For storing the attributes during the StartTag subroutine, so that
103# we can use it later in Text (or EndTag) subroutines
104my $currentAttrRef = undef;
105
106my $currentLevel = "";
107
108# Count the elements with same name within the same block
109# ("plugin", "option")
110my $currentIndex = 0;
111my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
112my $arrayarrayexp= q/^(plugin|classifier)$/; #|buildOption)$/;
113my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
114my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
115
116my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
117
118# Reads in the model collection configuration file, collectionConfig.xml,
119# into a structure which complies with the one used by gs2 (i.e. one read
120# in by &cfgread::read_cfg_file).
121sub read_cfg_file {
122 my ($filename) = @_;
123 $data = {};
124 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
125 return undef;
126 }
127
128 # create XML::Parser object for parsing metadata.xml files
129 my $parser;
130 if ($]<5.008) {
131 # Perl 5.6
132 $parser = new XML::Parser('Style' => 'Stream',
133 'Handlers' => {'Char' => \&Char,
134 'Doctype' => \&Doctype
135 });
136 }
137 else {
138 # Perl 5.8
139 $parser = new XML::Parser('Style' => 'Stream',
140 'ProtocolEncoding' => 'ISO-8859-1',
141 'Handlers' => {'Char' => \&Char,
142 'Doctype' => \&Doctype
143 });
144 }
145
146 if (!open (COLCFG, $filename)) {
147 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
148 } else {
149
150 $parser->parsefile ($filename);# (COLCFG);
151 close (COLCFG);
152 }
153
154 #&Display;
155 return $data;
156}
157
158sub StartTag {
159# Those marked with #@ will not be executed at the same time when this sub is being called
160# so that if/elsif is used to avoid unnecessary tests
161 my ($expat, $element) = @_;
162
163 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
164 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
165 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
166 $currentAttrRef = \%_;
167
168 my $name = $_{'name'};
169 my $value = $_{'value'};
170 my $type = $_{'type'};
171
172 # for subcollections
173 my $filter = $_{'filter'};
174
175 # was this just a flax thing??
176 my $assigned = $_{'assigned'};
177
178 #@ Marking repeated block
179 if ($element =~ /$repeatedBlock/) {
180 $currentIndex = 0;
181 }
182
183 #@ handling block metadataList
184 elsif (defined $name and $name =~ /$stringexp/){
185 $currentLocation = $name;
186 }
187 #@ handling default search index/level/indexLanguage and languageMetadata
188 elsif ($element =~ /$defaults/) {
189 if (defined $name and $name =~ /\w/) {
190 $data->{$nameMap->{$element}} = $name;
191 }
192 }
193
194 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
195 elsif($element eq "displayItemList") {
196 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
197 }
198 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
199 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
200 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
201 $currentLocation = $name;
202 }
203 }
204
205 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
206 elsif ($element eq "infodb") {
207 $data->{'infodbtype'} = $type;
208 }
209
210 #@ Handling indexer: mgpp/mg/lucene; stringexp
211 elsif ($element eq "search") {
212 $data->{'buildtype'} = $type;
213 }
214
215 #@ Handling searchtype: plain,form; arrayexp
216 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
217 #@ Handling searchtype: plain, form
218 #$currentLocation = $name;
219 #}
220
221 #@ Handle index|level|indexSubcollection|indexLanguage
222 elsif ($element =~ /$arrayexp/) {
223 my $key = $nameMap->{$element};
224 if (!defined $data->{$key}) {
225 $data->{$key} = [];
226 }
227
228 push (@{$data->{$key}},$name);
229 }
230
231 #*****************************************
232 elsif ($element eq "buildOption") {
233 print STDERR "**** BUILD OPTION PAIR $name $value\n";
234 $data->{$name} = $value;
235 }
236
237
238 #@ indexoptions: accentfold/casefold/stem; arrayexp
239 elsif ($element eq "indexOption") {
240 $currentLevel = "indexOption";
241 }
242 if ($currentLevel eq "indexOption" and $element eq "option") {
243 my $key = $nameMap->{$currentLevel};
244 if (!defined $data->{$key}) {
245 $data->{$key} = [];
246 }
247 push (@{$data->{$key}},$name);
248 }
249 #@ plugout options
250 elsif ($element eq "plugout") {
251 $currentLevel = "plugout";
252 my $key = $nameMap->{$currentLevel};
253 if (!defined $data->{$key}) {
254 $data->{$key} = [];
255 }
256 if(defined $name and $name ne ""){
257 push (@{$data->{$key}},$name);
258 }
259 else{
260 push (@{$data->{$key}},"GreenstoneXMLPlugout");
261 }
262 }
263 if ($currentLevel eq "plugout" and $element eq "option") {
264 my $key = $nameMap->{$currentLevel};
265 if (defined $name and $name ne ""){
266 push (@{$data->{$key}},$name);
267 }
268 if (defined $value and $value ne ""){
269 push (@{$data->{$key}},$value);
270 }
271 }
272
273 #@ use hash of hash of strings: hashexp
274 elsif ($element =~ /$hashexp/) {
275 if (!defined $data->{$element}) {
276 $data->{$element} = {};
277 }
278 if (defined $name and $name =~ /\w/) {
279 if (defined $filter and $filter =~ /\w/) {
280 $data->{$element}->{$name} = $filter;
281
282 }
283 }
284 }
285
286 #@ Handling each classifier/plugin element
287 elsif ($element =~ /$arrayarrayexp/) {
288 # find the gs2 mapping name
289 $currentLevel = $element;
290 my $key = $nameMap->{$element};
291
292 # define an array of array of strings foreach $k (@{$data->{$key}}) {
293 if (!defined $data->{$key}) {
294 $data->{$key} = [];
295 }
296 # Push classifier/plugin name (e.g. AZList) into $data as the first string
297 push (@{$data->{$key}->[$currentIndex]},$name);
298 if (defined $value and $value =~ /\w/) {
299 push (@{$data->{$key}->[$currentIndex]}, $value);
300 print "$value\n";
301 }
302 #print $currentIndex."indexup\n";
303 }
304
305 #@ Handling the option elements in each classifier/plugin element (as the following strings)
306 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
307 # find the gs2 mapping name for classifier and plugin
308 my $key = $nameMap->{$currentLevel};
309
310 if (defined $name and $name =~ /\w/) {
311 push (@{$data->{$key}->[$currentIndex]}, $name);
312 }
313 if (defined $value and $value =~ /\w/) {
314 push (@{$data->{$key}->[$currentIndex]}, $value);
315 }
316
317 }
318
319}
320
321sub EndTag {
322 my ($expat, $element) = @_;
323 my $endTags = q/^(browse|pluginList|displayItemList)$/; #|buildOptionList)$/;
324 if ($element =~ /$endTags/) {
325 $currentIndex = 0;
326 $currentLevel = "";
327 }
328 # $arrayarrayexp contains classifier|plugin
329 elsif($element =~ /$arrayarrayexp/ ){
330 $currentIndex = $currentIndex + 1;
331 }
332}
333
334sub Text {
335 if (defined $currentLocation) {
336 #@ Handling block metadataList(creator, maintainer, public)
337 if($currentLocation =~ /$stringexp/){
338 #print $currentLocation;
339 my $key = $nameMap->{$currentLocation};
340 $data->{$key} = $_;
341 undef $currentLocation;
342 }
343
344 #@ Handling displayItem metadata that are children of displayItemList
345 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
346 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
347 my $lang = $currentAttrRef->{'lang'};
348 my $name = $currentAttrRef->{'name'};
349
350 # this is how data->collectionmeta's language is set in Greenstone 2.
351 # Need to be consistent, since export.pl accesses these values all in the same way
352 if(!defined $lang) {
353 $lang = 'default';
354 } else {
355 $lang = "[l=$lang]";
356 }
357
358 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
359 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
360 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
361 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
362 }
363 undef $currentLocation;
364 }
365
366 #@ Handling searchtype: plain,form; arrayexp
367 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
368 # map 'searchType' into 'searchtype'
369 my $key = $nameMap->{$currentLocation};
370 # split it by ','
371 my ($plain, $form) = split (",", $_);
372
373 if (!defined $data->{$key}) {
374 $data->{$key} = [];
375 }
376 if (defined $plain and $plain =~ /\w/) {
377 push @{ $data->{$key} }, $plain;
378 }
379 if (defined $form and $form =~ /\w/) {
380 push @{ $data->{$key} }, $form;
381 }
382 }
383 }
384}
385
386# This sub is for debugging purposes
387sub Display {
388 # metadataList
389 foreach my $k (keys %{$data}) {
390 print STDERR "*** metadatalist key $k\n";
391 }
392
393 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
394 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
395 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
396 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
397 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
398 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
399 print STDERR "*** search types: \n";
400 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
401 print STDERR "*** levels: \n";
402 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
403 print STDERR "*** index subcollections: \n";
404 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
405 print STDERR "*** indexes: \n";
406 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
407 print STDERR "*** index options: \n";
408 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
409 print STDERR "*** languages: \n";
410 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
411 print STDERR "*** language metadata: \n";
412 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
413
414 print STDERR "*** Plugins: \n";
415 if (defined $data->{'plugin'}) {
416 foreach $a (@{$data->{'plugin'}}) {
417 print join(",",@$a);
418 print "\n";
419 }
420 }
421
422 #print STDERR "*** Build options: \n";
423 #if (defined $data->{'store_metadata_coverage'}) {
424 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
425 # print join(",",@$a,@$_);
426 # print "\n";
427 #}
428 #}
429
430 if (defined $data->{'classify'}) {
431 print STDERR "*** Classifiers: \n";
432 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
433 }
434
435 if (defined $data->{'subcollection'}) {
436 foreach my $key (keys %{$data->{'subcollection'}}) {
437 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
438 }
439 }
440}
441# is this actually used??
442sub Doctype {
443 my ($expat, $name, $sysid, $pubid, $internal) = @_;
444
445 die if ($name !~ /^CollectionConfig$/);
446}
447
448# This Char function overrides the one in XML::Parser::Stream to overcome a
449# problem where $expat->{Text} is treated as the return value, slowing
450# things down significantly in some cases.
451sub Char {
452 if ($]<5.008) {
453 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
454 }
455 $_[0]->{'Text'} .= $_[1];
456 return undef;
457}
458
459
460
461
462#########################################################
463
4641;
Note: See TracBrowser for help on using the repository browser.