source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 20999

Last change on this file since 20999 was 20104, checked in by kjdon, 15 years ago

added back the assigned variable, modified DocType

  • Property svn:keywords set to Author Date Id Revision
File size: 14.0 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used. Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37 my $perl_dir;
38
39 # Note: $] encodes the version number of perl
40 if ($]>5.008) {
41 # perl 5.8.1 or above
42 $perl_dir = "perl-5.8";
43 }
44 elsif ($]<5.008) {
45 # assume perl 5.6
46 $perl_dir = "perl-5.6";
47 }
48 else {
49 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
50 print STDERR " Please upgrade to a newer version of Perl.\n";
51 $perl_dir = "perl-5.8";
52 }
53
54 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
55 # Use push to put this on the end, so an existing XML::Parser will be used by default
56 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
57 }
58}
59
60use XML::Parser;
61
62# A mapping hash to resolve name discrepancy between gs2 and gs3.
63my $nameMap = {"key" => "value",
64 "creator" => "creator",
65 "maintainer" => "maintainer",
66 "public" => "public",
67 "infodb" => "infodbtype",
68 "defaultIndex" => "defaultindex",
69 "defaultLevel" => "defaultlevel",
70 "name" => "collectionname",
71 "description" => "collectionextra",
72 "smallicon" => "iconcollectionsmall",
73 "icon" => "iconcollection",
74 "level" => "levels",
75 "classifier" => "classify",
76 "indexSubcollection" => "indexsubcollections",
77 "indexLanguage" => "languages",
78 "defaultIndexLanguage" => "defaultlanguage",
79 "index" => "indexes",
80 "plugin" => "plugin",
81 "plugout" => "plugout",
82 "indexOption" => "indexoptions",
83 "searchType" => "searchtype",
84 "languageMetadata" => "languagemetadata",
85 "buildType" => "buildtype"
86 };
87# A hash structure which is returned by sub read_cfg_file.
88my $data = {};
89
90my $repeatedBlock = q/^(browse|pluginList)$/;
91
92# use those unique attribute values to locate the text within the elements
93# creator, public, maintainer and within a displayItem.
94my $currentLocation = "";
95my $stringexp = q/^(creator|maintainer|public|buildType)$/;
96my $displayItemNames = q/^(name|description)$/;
97
98# For storing the attributes during the StartTag subroutine, so that
99# we can use it later in Text (or EndTag) subroutines
100my $currentAttrRef = undef;
101
102my $currentLevel = "";
103
104# Count the elements with same name within the same block
105# ("plugin", "option")
106my $currentIndex = 0;
107my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
108my $arrayarrayexp= q/^(plugin|classifier)$/;
109my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
110my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
111
112my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
113
114# Reads in the model collection configuration file, collectionConfig.xml,
115# into a structure which complies with the one used by gs2 (i.e. one read
116# in by &cfgread::read_cfg_file).
117sub read_cfg_file {
118 my ($filename) = @_;
119 $data = {};
120 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
121 return undef;
122 }
123
124 # create XML::Parser object for parsing metadata.xml files
125 my $parser;
126 if ($]<5.008) {
127 # Perl 5.6
128 $parser = new XML::Parser('Style' => 'Stream',
129 'Handlers' => {'Char' => \&Char,
130 'Doctype' => \&Doctype
131 });
132 }
133 else {
134 # Perl 5.8
135 $parser = new XML::Parser('Style' => 'Stream',
136 'ProtocolEncoding' => 'ISO-8859-1',
137 'Handlers' => {'Char' => \&Char,
138 'Doctype' => \&Doctype
139 });
140 }
141
142 if (!open (COLCFG, $filename)) {
143 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
144 } else {
145
146 $parser->parsefile ($filename);# (COLCFG);
147 close (COLCFG);
148 }
149
150 #&Display;
151 return $data;
152}
153
154sub StartTag {
155# Those marked with #@ will not be executed at the same time when this sub is being called
156# so that if/elsif is used to avoid unnecessary tests
157 my ($expat, $element) = @_;
158
159 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
160 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
161 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
162 $currentAttrRef = \%_;
163
164 my $name = $_{'name'};
165 my $value = $_{'value'};
166 my $type = $_{'type'};
167
168 # for subcollections
169 my $filter = $_{'filter'};
170
171 # was this just a flax thing??
172 my $assigned = $_{'assigned'};
173
174 #@ Marking repeated block
175 if ($element =~ /$repeatedBlock/) {
176 $currentIndex = 0;
177 }
178
179 #@ handling block metadataList
180 elsif (defined $name and $name =~ /$stringexp/){
181 $currentLocation = $name;
182 }
183 #@ handling default search index/level/indexLanguage and languageMetadata
184 elsif ($element =~ /$defaults/) {
185 if (defined $name and $name =~ /\w/) {
186 $data->{$nameMap->{$element}} = $name;
187 }
188 }
189
190 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
191 elsif($element eq "displayItemList") {
192 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
193 }
194 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
195 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
196 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
197 $currentLocation = $name;
198 }
199 }
200
201 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
202 elsif ($element eq "infodb") {
203 $data->{'infodbtype'} = $type;
204 }
205
206 #@ Handling indexer: mgpp/mg/lucene; stringexp
207 elsif ($element eq "search") {
208 $data->{'buildtype'} = $type;
209 }
210
211 #@ Handling searchtype: plain,form; arrayexp
212 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
213 #@ Handling searchtype: plain, form
214 #$currentLocation = $name;
215 #}
216
217 #@ Handle index|level|indexSubcollection|indexLanguage
218 elsif ($element =~ /$arrayexp/) {
219 my $key = $nameMap->{$element};
220 if (!defined $data->{$key}) {
221 $data->{$key} = [];
222 }
223
224 push (@{$data->{$key}},$name);
225 }
226
227 #@ indexoptions: accentfold/casefold/stem; arrayexp
228 elsif ($element eq "indexOption") {
229 $currentLevel = "indexOption";
230 }
231 if ($currentLevel eq "indexOption" and $element eq "option") {
232 my $key = $nameMap->{$currentLevel};
233 if (!defined $data->{$key}) {
234 $data->{$key} = [];
235 }
236 push (@{$data->{$key}},$name);
237 }
238 #@ plugout options
239 elsif ($element eq "plugout") {
240 $currentLevel = "plugout";
241 my $key = $nameMap->{$currentLevel};
242 if (!defined $data->{$key}) {
243 $data->{$key} = [];
244 }
245 if(defined $name and $name ne ""){
246 push (@{$data->{$key}},$name);
247 }
248 else{
249 push (@{$data->{$key}},"GreenstoneXMLPlugout");
250 }
251 }
252 if ($currentLevel eq "plugout" and $element eq "option") {
253 my $key = $nameMap->{$currentLevel};
254 if (defined $name and $name ne ""){
255 push (@{$data->{$key}},$name);
256 }
257 if (defined $value and $value ne ""){
258 push (@{$data->{$key}},$value);
259 }
260 }
261
262 #@ use hash of hash of strings: hashexp
263 elsif ($element =~ /$hashexp/) {
264 if (!defined $data->{$element}) {
265 $data->{$element} = {};
266 }
267 if (defined $name and $name =~ /\w/) {
268 if (defined $filter and $filter =~ /\w/) {
269 $data->{$element}->{$name} = $filter;
270
271 }
272 }
273 }
274
275 #@ Handling each classifier/plugin element
276 elsif ($element =~ /$arrayarrayexp/) {
277 # find the gs2 mapping name
278 $currentLevel = $element;
279 my $key = $nameMap->{$element};
280
281 # define an array of array of strings foreach $k (@{$data->{$key}}) {
282 if (!defined $data->{$key}) {
283 $data->{$key} = [];
284 }
285 # Push classifier/plugin name (e.g. AZList) into $data as the first string
286 push (@{$data->{$key}->[$currentIndex]},$name);
287 #print $currentIndex."indexup\n";
288 }
289
290 #@ Handling the option elements in each classifier/plugin element (as the following strings)
291 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
292 # find the gs2 mapping name for classifier and plugin
293 my $key = $nameMap->{$currentLevel};
294
295 if (defined $name and $name =~ /\w/) {
296 push (@{$data->{$key}->[$currentIndex]}, $name);
297 }
298 if (defined $value and $value =~ /\w/) {
299 push (@{$data->{$key}->[$currentIndex]}, $value);
300 }
301
302 }
303
304}
305
306sub EndTag {
307 my ($expat, $element) = @_;
308 my $endTags = q/^(browse|pluginList|displayItemList)$/;
309 if ($element =~ /$endTags/) {
310 $currentIndex = 0;
311 $currentLevel = "";
312 }
313 # $arrayarrayexp contains classifier|plugin
314 elsif($element =~ /$arrayarrayexp/ ){
315 $currentIndex = $currentIndex + 1;
316 }
317}
318
319sub Text {
320 if (defined $currentLocation) {
321 #@ Handling block metadataList(creator, maintainer, public)
322 if($currentLocation =~ /$stringexp/){
323 #print $currentLocation;
324 my $key = $nameMap->{$currentLocation};
325 $data->{$key} = $_;
326 undef $currentLocation;
327 }
328
329 #@ Handling displayItem metadata that are children of displayItemList
330 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
331 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
332 my $lang = $currentAttrRef->{'lang'};
333 my $name = $currentAttrRef->{'name'};
334
335 # this is how data->collectionmeta's language is set in Greenstone 2.
336 # Need to be consistent, since export.pl accesses these values all in the same way
337 if(!defined $lang) {
338 $lang = 'default';
339 } else {
340 $lang = "[l=$lang]";
341 }
342
343 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
344 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
345 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
346 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
347 }
348 undef $currentLocation;
349 }
350
351 #@ Handling searchtype: plain,form; arrayexp
352 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
353 # map 'searchType' into 'searchtype'
354 my $key = $nameMap->{$currentLocation};
355 # split it by ','
356 my ($plain, $form) = split (",", $_);
357
358 if (!defined $data->{$key}) {
359 $data->{$key} = [];
360 }
361 if (defined $plain and $plain =~ /\w/) {
362 push @{ $data->{$key} }, $plain;
363 }
364 if (defined $form and $form =~ /\w/) {
365 push @{ $data->{$key} }, $form;
366 }
367 }
368 }
369}
370
371# This sub is for debugging purposes
372sub Display {
373 # metadataList
374 foreach my $k (keys %{$data}) {
375 print STDERR "*** metadatalist key $k\n";
376 }
377
378 print $data->{'creator'}."\n" if (defined $data->{'creator'});
379 print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
380 print $data->{"public"}."\n" if (defined $data->{"public"});
381 print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
382 print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
383 print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
384 print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
385 print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
386 print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
387 print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
388 print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
389 print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
390 print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
391
392 if (defined $data->{'plugin'}) {
393 foreach $a (@{$data->{'plugin'}}) {
394 print join(",",@$a);
395 print "\n";
396 }
397 }
398 if (defined $data->{'classify'}) {
399 print "Classifiers: \n";
400 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
401 }
402
403 if (defined $data->{'subcollection'}) {
404 foreach my $key (keys %{$data->{'subcollection'}}) {
405 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
406 }
407 }
408}
409# is this actually used??
410sub Doctype {
411 my ($expat, $name, $sysid, $pubid, $internal) = @_;
412
413 die if ($name !~ /^CollectionConfig$/);
414}
415
416# This Char function overrides the one in XML::Parser::Stream to overcome a
417# problem where $expat->{Text} is treated as the return value, slowing
418# things down significantly in some cases.
419sub Char {
420 if ($]<5.008) {
421 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
422 }
423 $_[0]->{'Text'} .= $_[1];
424 return undef;
425}
426
427
428
429
430#########################################################
431
4321;
Note: See TracBrowser for help on using the repository browser.