source: main/trunk/greenstone2/perllib/collConfigxml.pm@ 23895

Last change on this file since 23895 was 23895, checked in by sjm84, 13 years ago

Modified several Perl files to merge the locations where XML::Parser checked for the current version of perl into one location. Also tidied up several locations where a difference was specified between 5.6 and 5.8+ to do with ProtocolEncoding being used to initialise an XML::Parser. Given the recent "

  • Property svn:keywords set to Author Date Id Revision
File size: 14.2 KB
RevLine 
[15600]1###########################################################################
2#
[20096]3# collConfigxml.pm --
[15600]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[14741]25
[15600]26# reads in configuration files of xml form
27
[20096]28package collConfigxml;
[15600]29use strict;
30no strict 'refs';
31no strict 'subs';
32
[23895]33use XMLParser;
[15600]34
[17895]35# A mapping hash to resolve name discrepancy between gs2 and gs3.
[15600]36my $nameMap = {"key" => "value",
37 "creator" => "creator",
38 "maintainer" => "maintainer",
39 "public" => "public",
[17895]40 "infodb" => "infodbtype",
[15600]41 "defaultIndex" => "defaultindex",
42 "defaultLevel" => "defaultlevel",
43 "name" => "collectionname",
44 "description" => "collectionextra",
45 "smallicon" => "iconcollectionsmall",
46 "icon" => "iconcollection",
47 "level" => "levels",
48 "classifier" => "classify",
49 "indexSubcollection" => "indexsubcollections",
50 "indexLanguage" => "languages",
51 "defaultIndexLanguage" => "defaultlanguage",
52 "index" => "indexes",
53 "plugin" => "plugin",
[17895]54 "plugout" => "plugout",
[15600]55 "indexOption" => "indexoptions",
56 "searchType" => "searchtype",
57 "languageMetadata" => "languagemetadata",
[22456]58 "buildType" => "buildtype",
59 "buildOption" => "buildOption"
[15600]60 };
61# A hash structure which is returned by sub read_cfg_file.
62my $data = {};
63
[22456]64my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
[15600]65
66# use those unique attribute values to locate the text within the elements
[15619]67# creator, public, maintainer and within a displayItem.
[15600]68my $currentLocation = "";
[19898]69my $stringexp = q/^(creator|maintainer|public|buildType)$/;
[15619]70my $displayItemNames = q/^(name|description)$/;
71
72# For storing the attributes during the StartTag subroutine, so that
73# we can use it later in Text (or EndTag) subroutines
74my $currentAttrRef = undef;
[15600]75
76my $currentLevel = "";
77
78# Count the elements with same name within the same block
79# ("plugin", "option")
80my $currentIndex = 0;
81my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
[22456]82my $arrayarrayexp= q/^(plugin|classifier)$/; #|buildOption)$/;
[15619]83my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
84my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
[15600]85
86my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
87
[20099]88# Reads in the model collection configuration file, collectionConfig.xml,
89# into a structure which complies with the one used by gs2 (i.e. one read
90# in by &cfgread::read_cfg_file).
91sub read_cfg_file {
92 my ($filename) = @_;
93 $data = {};
94 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
95 return undef;
96 }
97
[23895]98 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
99
[20099]100 # create XML::Parser object for parsing metadata.xml files
[23895]101 my $parser = new XML::Parser('Style' => 'Stream',
102 'Pkg' => 'collConfigxml',
103 'Handlers' => {'Char' => \&Char,
[20099]104 'Doctype' => \&Doctype
105 });
106 if (!open (COLCFG, $filename)) {
107 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
108 } else {
109
110 $parser->parsefile ($filename);# (COLCFG);
111 close (COLCFG);
112 }
113
[22485]114 #&Display;
[20099]115 return $data;
116}
117
[15600]118sub StartTag {
119# Those marked with #@ will not be executed at the same time when this sub is being called
120# so that if/elsif is used to avoid unnecessary tests
121 my ($expat, $element) = @_;
[15619]122
123 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
[17895]124 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
[15619]125 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
126 $currentAttrRef = \%_;
[15600]127
128 my $name = $_{'name'};
129 my $value = $_{'value'};
130 my $type = $_{'type'};
131
132 # for subcollections
133 my $filter = $_{'filter'};
[20099]134
[20104]135 # was this just a flax thing??
136 my $assigned = $_{'assigned'};
137
[15600]138 #@ Marking repeated block
139 if ($element =~ /$repeatedBlock/) {
140 $currentIndex = 0;
141 }
142
143 #@ handling block metadataList
144 elsif (defined $name and $name =~ /$stringexp/){
145 $currentLocation = $name;
146 }
147 #@ handling default search index/level/indexLanguage and languageMetadata
148 elsif ($element =~ /$defaults/) {
149 if (defined $name and $name =~ /\w/) {
150 $data->{$nameMap->{$element}} = $name;
151 }
152 }
153
[15619]154 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
155 elsif($element eq "displayItemList") {
156 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
157 }
158 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
159 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
160 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
161 $currentLocation = $name;
162 }
163 }
[17895]164
165 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
166 elsif ($element eq "infodb") {
167 $data->{'infodbtype'} = $type;
168 }
[15619]169
[15600]170 #@ Handling indexer: mgpp/mg/lucene; stringexp
171 elsif ($element eq "search") {
172 $data->{'buildtype'} = $type;
173 }
174
175 #@ Handling searchtype: plain,form; arrayexp
176 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
177 #@ Handling searchtype: plain, form
178 #$currentLocation = $name;
179 #}
180
181 #@ Handle index|level|indexSubcollection|indexLanguage
182 elsif ($element =~ /$arrayexp/) {
183 my $key = $nameMap->{$element};
184 if (!defined $data->{$key}) {
185 $data->{$key} = [];
186 }
187
188 push (@{$data->{$key}},$name);
189 }
190
[22456]191 #*****************************************
192 elsif ($element eq "buildOption") {
193 print STDERR "**** BUILD OPTION PAIR $name $value\n";
194 $data->{$name} = $value;
195 }
196
197
[15600]198 #@ indexoptions: accentfold/casefold/stem; arrayexp
199 elsif ($element eq "indexOption") {
200 $currentLevel = "indexOption";
201 }
202 if ($currentLevel eq "indexOption" and $element eq "option") {
203 my $key = $nameMap->{$currentLevel};
204 if (!defined $data->{$key}) {
205 $data->{$key} = [];
206 }
207 push (@{$data->{$key}},$name);
208 }
209 #@ plugout options
210 elsif ($element eq "plugout") {
211 $currentLevel = "plugout";
212 my $key = $nameMap->{$currentLevel};
213 if (!defined $data->{$key}) {
214 $data->{$key} = [];
215 }
216 if(defined $name and $name ne ""){
217 push (@{$data->{$key}},$name);
218 }
219 else{
[17747]220 push (@{$data->{$key}},"GreenstoneXMLPlugout");
[15600]221 }
222 }
223 if ($currentLevel eq "plugout" and $element eq "option") {
224 my $key = $nameMap->{$currentLevel};
225 if (defined $name and $name ne ""){
226 push (@{$data->{$key}},$name);
227 }
228 if (defined $value and $value ne ""){
229 push (@{$data->{$key}},$value);
230 }
231 }
232
233 #@ use hash of hash of strings: hashexp
[15619]234 elsif ($element =~ /$hashexp/) {
235 if (!defined $data->{$element}) {
236 $data->{$element} = {};
[15600]237 }
238 if (defined $name and $name =~ /\w/) {
239 if (defined $filter and $filter =~ /\w/) {
[15619]240 $data->{$element}->{$name} = $filter;
[15600]241
242 }
243 }
244 }
245
246 #@ Handling each classifier/plugin element
247 elsif ($element =~ /$arrayarrayexp/) {
248 # find the gs2 mapping name
249 $currentLevel = $element;
250 my $key = $nameMap->{$element};
251
252 # define an array of array of strings foreach $k (@{$data->{$key}}) {
253 if (!defined $data->{$key}) {
254 $data->{$key} = [];
255 }
256 # Push classifier/plugin name (e.g. AZList) into $data as the first string
257 push (@{$data->{$key}->[$currentIndex]},$name);
[22456]258 if (defined $value and $value =~ /\w/) {
259 push (@{$data->{$key}->[$currentIndex]}, $value);
260 print "$value\n";
261 }
[15600]262 #print $currentIndex."indexup\n";
263 }
264
265 #@ Handling the option elements in each classifier/plugin element (as the following strings)
266 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
267 # find the gs2 mapping name for classifier and plugin
268 my $key = $nameMap->{$currentLevel};
269
270 if (defined $name and $name =~ /\w/) {
271 push (@{$data->{$key}->[$currentIndex]}, $name);
272 }
273 if (defined $value and $value =~ /\w/) {
274 push (@{$data->{$key}->[$currentIndex]}, $value);
275 }
276
277 }
[20102]278
[15600]279}
280
281sub EndTag {
282 my ($expat, $element) = @_;
[22456]283 my $endTags = q/^(browse|pluginList|displayItemList)$/; #|buildOptionList)$/;
[15600]284 if ($element =~ /$endTags/) {
285 $currentIndex = 0;
286 $currentLevel = "";
287 }
288 # $arrayarrayexp contains classifier|plugin
[20102]289 elsif($element =~ /$arrayarrayexp/ ){
[15600]290 $currentIndex = $currentIndex + 1;
291 }
292}
293
294sub Text {
[15619]295 if (defined $currentLocation) {
296 #@ Handling block metadataList(creator, maintainer, public)
297 if($currentLocation =~ /$stringexp/){
298 #print $currentLocation;
299 my $key = $nameMap->{$currentLocation};
300 $data->{$key} = $_;
301 undef $currentLocation;
302 }
[15600]303
[15619]304 #@ Handling displayItem metadata that are children of displayItemList
305 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
306 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
307 my $lang = $currentAttrRef->{'lang'};
308 my $name = $currentAttrRef->{'name'};
309
310 # this is how data->collectionmeta's language is set in Greenstone 2.
311 # Need to be consistent, since export.pl accesses these values all in the same way
312 if(!defined $lang) {
313 $lang = 'default';
314 } else {
315 $lang = "[l=$lang]";
316 }
317
318 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
319 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
320 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
321 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
322 }
323 undef $currentLocation;
[15600]324 }
[15619]325
326 #@ Handling searchtype: plain,form; arrayexp
327 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
328 # map 'searchType' into 'searchtype'
329 my $key = $nameMap->{$currentLocation};
330 # split it by ','
331 my ($plain, $form) = split (",", $_);
332
333 if (!defined $data->{$key}) {
334 $data->{$key} = [];
335 }
336 if (defined $plain and $plain =~ /\w/) {
337 push @{ $data->{$key} }, $plain;
338 }
339 if (defined $form and $form =~ /\w/) {
340 push @{ $data->{$key} }, $form;
341 }
[15600]342 }
[15619]343 }
[15600]344}
[15619]345
[15600]346# This sub is for debugging purposes
347sub Display {
348 # metadataList
[15619]349 foreach my $k (keys %{$data}) {
350 print STDERR "*** metadatalist key $k\n";
351 }
352
[22456]353 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
354 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
355 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
356 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
357 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
358 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
359 print STDERR "*** search types: \n";
360 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
361 print STDERR "*** levels: \n";
362 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
363 print STDERR "*** index subcollections: \n";
364 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
365 print STDERR "*** indexes: \n";
366 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
367 print STDERR "*** index options: \n";
368 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
369 print STDERR "*** languages: \n";
370 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
371 print STDERR "*** language metadata: \n";
372 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
[15600]373
[22456]374 print STDERR "*** Plugins: \n";
[15600]375 if (defined $data->{'plugin'}) {
376 foreach $a (@{$data->{'plugin'}}) {
377 print join(",",@$a);
378 print "\n";
379 }
380 }
[22456]381
382 #print STDERR "*** Build options: \n";
383 #if (defined $data->{'store_metadata_coverage'}) {
384 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
385 # print join(",",@$a,@$_);
386 # print "\n";
387 #}
388 #}
389
[15600]390 if (defined $data->{'classify'}) {
[22456]391 print STDERR "*** Classifiers: \n";
[15600]392 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
393 }
394
395 if (defined $data->{'subcollection'}) {
396 foreach my $key (keys %{$data->{'subcollection'}}) {
397 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
398 }
399 }
400}
[20104]401# is this actually used??
[15600]402sub Doctype {
403 my ($expat, $name, $sysid, $pubid, $internal) = @_;
404
[20104]405 die if ($name !~ /^CollectionConfig$/);
[15600]406}
407
408# This Char function overrides the one in XML::Parser::Stream to overcome a
409# problem where $expat->{Text} is treated as the return value, slowing
410# things down significantly in some cases.
411sub Char {
412 if ($]<5.008) {
413 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
414 }
415 $_[0]->{'Text'} .= $_[1];
416 return undef;
417}
[15619]418
[15600]419
420
421
422#########################################################
423
4241;
Note: See TracBrowser for help on using the repository browser.