source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 21822

Last change on this file since 21822 was 21822, checked in by ak19, 12 years ago

Dr Bainbridge has fixed several perl files that depended on perl 5.8 to work and used to fail with Perl 5.10.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.0 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used. Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37 my $perl_dir;
38
39 # Note: $] encodes the version number of perl
40 if ($]>=5.010) {
41 $perl_dir = "perl-5.10";
42 }
43 elsif ($]>5.008) {
44 # perl 5.8.1 or above
45 $perl_dir = "perl-5.8";
46 }
47 elsif ($]<5.008) {
48 # assume perl 5.6
49 $perl_dir = "perl-5.6";
50 }
51 else {
52 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
53 print STDERR " Please upgrade to a newer version of Perl.\n";
54 $perl_dir = "perl-5.8";
55 }
56
57 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
58 # Use push to put this on the end, so an existing XML::Parser will be used by default
59 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
60 }
61}
62
63use XML::Parser;
64
65# A mapping hash to resolve name discrepancy between gs2 and gs3.
66my $nameMap = {"key" => "value",
67 "creator" => "creator",
68 "maintainer" => "maintainer",
69 "public" => "public",
70 "infodb" => "infodbtype",
71 "defaultIndex" => "defaultindex",
72 "defaultLevel" => "defaultlevel",
73 "name" => "collectionname",
74 "description" => "collectionextra",
75 "smallicon" => "iconcollectionsmall",
76 "icon" => "iconcollection",
77 "level" => "levels",
78 "classifier" => "classify",
79 "indexSubcollection" => "indexsubcollections",
80 "indexLanguage" => "languages",
81 "defaultIndexLanguage" => "defaultlanguage",
82 "index" => "indexes",
83 "plugin" => "plugin",
84 "plugout" => "plugout",
85 "indexOption" => "indexoptions",
86 "searchType" => "searchtype",
87 "languageMetadata" => "languagemetadata",
88 "buildType" => "buildtype"
89 };
90# A hash structure which is returned by sub read_cfg_file.
91my $data = {};
92
93my $repeatedBlock = q/^(browse|pluginList)$/;
94
95# use those unique attribute values to locate the text within the elements
96# creator, public, maintainer and within a displayItem.
97my $currentLocation = "";
98my $stringexp = q/^(creator|maintainer|public|buildType)$/;
99my $displayItemNames = q/^(name|description)$/;
100
101# For storing the attributes during the StartTag subroutine, so that
102# we can use it later in Text (or EndTag) subroutines
103my $currentAttrRef = undef;
104
105my $currentLevel = "";
106
107# Count the elements with same name within the same block
108# ("plugin", "option")
109my $currentIndex = 0;
110my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
111my $arrayarrayexp= q/^(plugin|classifier)$/;
112my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
113my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
114
115my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
116
117# Reads in the model collection configuration file, collectionConfig.xml,
118# into a structure which complies with the one used by gs2 (i.e. one read
119# in by &cfgread::read_cfg_file).
120sub read_cfg_file {
121 my ($filename) = @_;
122 $data = {};
123 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
124 return undef;
125 }
126
127 # create XML::Parser object for parsing metadata.xml files
128 my $parser;
129 if ($]<5.008) {
130 # Perl 5.6
131 $parser = new XML::Parser('Style' => 'Stream',
132 'Handlers' => {'Char' => \&Char,
133 'Doctype' => \&Doctype
134 });
135 }
136 else {
137 # Perl 5.8
138 $parser = new XML::Parser('Style' => 'Stream',
139 'ProtocolEncoding' => 'ISO-8859-1',
140 'Handlers' => {'Char' => \&Char,
141 'Doctype' => \&Doctype
142 });
143 }
144
145 if (!open (COLCFG, $filename)) {
146 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
147 } else {
148
149 $parser->parsefile ($filename);# (COLCFG);
150 close (COLCFG);
151 }
152
153 #&Display;
154 return $data;
155}
156
157sub StartTag {
158# Those marked with #@ will not be executed at the same time when this sub is being called
159# so that if/elsif is used to avoid unnecessary tests
160 my ($expat, $element) = @_;
161
162 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
163 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
164 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
165 $currentAttrRef = \%_;
166
167 my $name = $_{'name'};
168 my $value = $_{'value'};
169 my $type = $_{'type'};
170
171 # for subcollections
172 my $filter = $_{'filter'};
173
174 # was this just a flax thing??
175 my $assigned = $_{'assigned'};
176
177 #@ Marking repeated block
178 if ($element =~ /$repeatedBlock/) {
179 $currentIndex = 0;
180 }
181
182 #@ handling block metadataList
183 elsif (defined $name and $name =~ /$stringexp/){
184 $currentLocation = $name;
185 }
186 #@ handling default search index/level/indexLanguage and languageMetadata
187 elsif ($element =~ /$defaults/) {
188 if (defined $name and $name =~ /\w/) {
189 $data->{$nameMap->{$element}} = $name;
190 }
191 }
192
193 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
194 elsif($element eq "displayItemList") {
195 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
196 }
197 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
198 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
199 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
200 $currentLocation = $name;
201 }
202 }
203
204 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
205 elsif ($element eq "infodb") {
206 $data->{'infodbtype'} = $type;
207 }
208
209 #@ Handling indexer: mgpp/mg/lucene; stringexp
210 elsif ($element eq "search") {
211 $data->{'buildtype'} = $type;
212 }
213
214 #@ Handling searchtype: plain,form; arrayexp
215 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
216 #@ Handling searchtype: plain, form
217 #$currentLocation = $name;
218 #}
219
220 #@ Handle index|level|indexSubcollection|indexLanguage
221 elsif ($element =~ /$arrayexp/) {
222 my $key = $nameMap->{$element};
223 if (!defined $data->{$key}) {
224 $data->{$key} = [];
225 }
226
227 push (@{$data->{$key}},$name);
228 }
229
230 #@ indexoptions: accentfold/casefold/stem; arrayexp
231 elsif ($element eq "indexOption") {
232 $currentLevel = "indexOption";
233 }
234 if ($currentLevel eq "indexOption" and $element eq "option") {
235 my $key = $nameMap->{$currentLevel};
236 if (!defined $data->{$key}) {
237 $data->{$key} = [];
238 }
239 push (@{$data->{$key}},$name);
240 }
241 #@ plugout options
242 elsif ($element eq "plugout") {
243 $currentLevel = "plugout";
244 my $key = $nameMap->{$currentLevel};
245 if (!defined $data->{$key}) {
246 $data->{$key} = [];
247 }
248 if(defined $name and $name ne ""){
249 push (@{$data->{$key}},$name);
250 }
251 else{
252 push (@{$data->{$key}},"GreenstoneXMLPlugout");
253 }
254 }
255 if ($currentLevel eq "plugout" and $element eq "option") {
256 my $key = $nameMap->{$currentLevel};
257 if (defined $name and $name ne ""){
258 push (@{$data->{$key}},$name);
259 }
260 if (defined $value and $value ne ""){
261 push (@{$data->{$key}},$value);
262 }
263 }
264
265 #@ use hash of hash of strings: hashexp
266 elsif ($element =~ /$hashexp/) {
267 if (!defined $data->{$element}) {
268 $data->{$element} = {};
269 }
270 if (defined $name and $name =~ /\w/) {
271 if (defined $filter and $filter =~ /\w/) {
272 $data->{$element}->{$name} = $filter;
273
274 }
275 }
276 }
277
278 #@ Handling each classifier/plugin element
279 elsif ($element =~ /$arrayarrayexp/) {
280 # find the gs2 mapping name
281 $currentLevel = $element;
282 my $key = $nameMap->{$element};
283
284 # define an array of array of strings foreach $k (@{$data->{$key}}) {
285 if (!defined $data->{$key}) {
286 $data->{$key} = [];
287 }
288 # Push classifier/plugin name (e.g. AZList) into $data as the first string
289 push (@{$data->{$key}->[$currentIndex]},$name);
290 #print $currentIndex."indexup\n";
291 }
292
293 #@ Handling the option elements in each classifier/plugin element (as the following strings)
294 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
295 # find the gs2 mapping name for classifier and plugin
296 my $key = $nameMap->{$currentLevel};
297
298 if (defined $name and $name =~ /\w/) {
299 push (@{$data->{$key}->[$currentIndex]}, $name);
300 }
301 if (defined $value and $value =~ /\w/) {
302 push (@{$data->{$key}->[$currentIndex]}, $value);
303 }
304
305 }
306
307}
308
309sub EndTag {
310 my ($expat, $element) = @_;
311 my $endTags = q/^(browse|pluginList|displayItemList)$/;
312 if ($element =~ /$endTags/) {
313 $currentIndex = 0;
314 $currentLevel = "";
315 }
316 # $arrayarrayexp contains classifier|plugin
317 elsif($element =~ /$arrayarrayexp/ ){
318 $currentIndex = $currentIndex + 1;
319 }
320}
321
322sub Text {
323 if (defined $currentLocation) {
324 #@ Handling block metadataList(creator, maintainer, public)
325 if($currentLocation =~ /$stringexp/){
326 #print $currentLocation;
327 my $key = $nameMap->{$currentLocation};
328 $data->{$key} = $_;
329 undef $currentLocation;
330 }
331
332 #@ Handling displayItem metadata that are children of displayItemList
333 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
334 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
335 my $lang = $currentAttrRef->{'lang'};
336 my $name = $currentAttrRef->{'name'};
337
338 # this is how data->collectionmeta's language is set in Greenstone 2.
339 # Need to be consistent, since export.pl accesses these values all in the same way
340 if(!defined $lang) {
341 $lang = 'default';
342 } else {
343 $lang = "[l=$lang]";
344 }
345
346 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
347 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
348 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
349 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
350 }
351 undef $currentLocation;
352 }
353
354 #@ Handling searchtype: plain,form; arrayexp
355 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
356 # map 'searchType' into 'searchtype'
357 my $key = $nameMap->{$currentLocation};
358 # split it by ','
359 my ($plain, $form) = split (",", $_);
360
361 if (!defined $data->{$key}) {
362 $data->{$key} = [];
363 }
364 if (defined $plain and $plain =~ /\w/) {
365 push @{ $data->{$key} }, $plain;
366 }
367 if (defined $form and $form =~ /\w/) {
368 push @{ $data->{$key} }, $form;
369 }
370 }
371 }
372}
373
374# This sub is for debugging purposes
375sub Display {
376 # metadataList
377 foreach my $k (keys %{$data}) {
378 print STDERR "*** metadatalist key $k\n";
379 }
380
381 print $data->{'creator'}."\n" if (defined $data->{'creator'});
382 print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
383 print $data->{"public"}."\n" if (defined $data->{"public"});
384 print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
385 print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
386 print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
387 print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
388 print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
389 print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
390 print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
391 print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
392 print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
393 print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
394
395 if (defined $data->{'plugin'}) {
396 foreach $a (@{$data->{'plugin'}}) {
397 print join(",",@$a);
398 print "\n";
399 }
400 }
401 if (defined $data->{'classify'}) {
402 print "Classifiers: \n";
403 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
404 }
405
406 if (defined $data->{'subcollection'}) {
407 foreach my $key (keys %{$data->{'subcollection'}}) {
408 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
409 }
410 }
411}
412# is this actually used??
413sub Doctype {
414 my ($expat, $name, $sysid, $pubid, $internal) = @_;
415
416 die if ($name !~ /^CollectionConfig$/);
417}
418
419# This Char function overrides the one in XML::Parser::Stream to overcome a
420# problem where $expat->{Text} is treated as the return value, slowing
421# things down significantly in some cases.
422sub Char {
423 if ($]<5.008) {
424 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
425 }
426 $_[0]->{'Text'} .= $_[1];
427 return undef;
428}
429
430
431
432
433#########################################################
434
4351;
Note: See TracBrowser for help on using the repository browser.