source: gs2-extensions/parallel-building/trunk/src/perllib/collConfigxml.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 14.8 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use XMLParser;
34
35# A mapping hash to resolve name discrepancy between gs2 and gs3.
36my $nameMap = {"key" => "value",
37 "creator" => "creator",
38 "maintainer" => "maintainer",
39 "public" => "public",
40 "infodb" => "infodbtype",
41 "defaultIndex" => "defaultindex",
42 "defaultLevel" => "defaultlevel",
43 "name" => "collectionname",
44 "description" => "collectionextra",
45 "smallicon" => "iconcollectionsmall",
46 "icon" => "iconcollection",
47 "level" => "levels",
48 "classifier" => "classify",
49 "indexSubcollection" => "indexsubcollections",
50 "indexLanguage" => "languages",
51 "defaultIndexLanguage" => "defaultlanguage",
52 "index" => "indexes",
53 "plugin" => "plugin",
54 "plugout" => "plugout",
55 "indexOption" => "indexoptions",
56 "searchType" => "searchtype",
57 "languageMetadata" => "languagemetadata",
58 "buildType" => "buildtype",
59 "orthogonalBuildTypes" => "orthogonalbuildtypes",
60 "buildOption" => "buildOption"
61 };
62# A hash structure which is returned by sub read_cfg_file.
63my $data = {};
64
65my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
66
67# use those unique attribute values to locate the text within the elements
68# creator, public, maintainer and within a displayItem.
69my $currentLocation = "";
70my $stringexp = q/^(creator|maintainer|public|buildType)$/;
71my $displayItemNames = q/^(name|description)$/;
72
73# For storing the attributes during the StartTag subroutine, so that
74# we can use it later in Text (or EndTag) subroutines
75my $currentAttrRef = undef;
76
77my $currentLevel = "";
78
79# Count the elements with same name within the same block
80# ("plugin", "option")
81my $currentIndex = 0;
82my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/;
83my $arrayarrayexp= q/^(plugin|classifier)$/; #|buildOption)$/;
84my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
85my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
86
87my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
88
89# Reads in the model collection configuration file, collectionConfig.xml,
90# into a structure which complies with the one used by gs2 (i.e. one read
91# in by &cfgread::read_cfg_file).
92sub read_cfg_file {
93 my ($filename) = @_;
94 $data = {};
95 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
96 return undef;
97 }
98
99 # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
100
101 # create XML::Parser object for parsing metadata.xml files
102 my $parser = new XML::Parser('Style' => 'Stream',
103 'Pkg' => 'collConfigxml',
104 'Handlers' => {'Char' => \&Char,
105 'Doctype' => \&Doctype
106 });
107 if (!open (COLCFG, $filename)) {
108 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
109 } else {
110
111 $parser->parsefile ($filename);# (COLCFG);
112 close (COLCFG);
113 }
114
115 #&Display;
116 return $data;
117}
118
119sub StartTag {
120# Those marked with #@ will not be executed at the same time when this sub is being called
121# so that if/elsif is used to avoid unnecessary tests
122 my ($expat, $element) = @_;
123
124 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
125 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
126 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
127 $currentAttrRef = \%_;
128
129 my $name = $_{'name'};
130 my $value = $_{'value'};
131 my $type = $_{'type'};
132 my $orthogonal = $_{'orthogonal'};
133
134 # for subcollections
135 my $filter = $_{'filter'};
136
137 # was this just a flax thing??
138 my $assigned = $_{'assigned'};
139
140 #@ Marking repeated block
141 if ($element =~ /$repeatedBlock/) {
142 $currentIndex = 0;
143 }
144
145 #@ handling block metadataList
146 elsif (defined $name and $name =~ /$stringexp/){
147 $currentLocation = $name;
148 }
149 #@ handling default search index/level/indexLanguage and languageMetadata
150 elsif ($element =~ /$defaults/) {
151 if (defined $name and $name =~ /\w/) {
152 $data->{$nameMap->{$element}} = $name;
153 }
154 }
155
156 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
157 elsif($element eq "displayItemList") {
158 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
159 }
160 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
161 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
162 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
163 $currentLocation = $name;
164 }
165 }
166
167 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
168 elsif ($element eq "infodb") {
169 $data->{'infodbtype'} = $type;
170 }
171
172 #@ Handling indexer: mgpp/mg/lucene; stringexp
173 #@ Handling orthogonal indexers: audioDB; arrayexp
174 elsif ($element eq "search") {
175 if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) {
176 push(@{$data->{'orthogonalbuildtypes'}},$type);
177 }
178 else {
179 $data->{'buildtype'} = $type;
180 }
181 }
182
183 elsif ($element eq "store_metadata_coverage")
184 {
185## print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*";
186 $data->{'store_metadata_coverage'} = $value;
187 }
188
189 #@ Handling searchtype: plain,form; arrayexp
190 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
191 #@ Handling searchtype: plain, form
192 #$currentLocation = $name;
193 #}
194
195 #@ Handle index|level|indexSubcollection|indexLanguage
196 elsif ($element =~ /$arrayexp/) {
197 my $key = $nameMap->{$element};
198 if (!defined $data->{$key}) {
199 $data->{$key} = [];
200 }
201
202 push (@{$data->{$key}},$name);
203 }
204
205 #*****************************************
206 elsif ($element eq "buildOption") {
207 print STDERR "**** BUILD OPTION PAIR $name $value\n";
208 $data->{$name} = $value;
209 }
210
211
212 #@ indexoptions: accentfold/casefold/stem; arrayexp
213 elsif ($element eq "indexOption") {
214 $currentLevel = "indexOption";
215 }
216 if ($currentLevel eq "indexOption" and $element eq "option") {
217 my $key = $nameMap->{$currentLevel};
218 if (!defined $data->{$key}) {
219 $data->{$key} = [];
220 }
221 push (@{$data->{$key}},$name);
222 }
223 #@ plugout options
224 elsif ($element eq "plugout") {
225 $currentLevel = "plugout";
226 my $key = $nameMap->{$currentLevel};
227 if (!defined $data->{$key}) {
228 $data->{$key} = [];
229 }
230 if(defined $name and $name ne ""){
231 push (@{$data->{$key}},$name);
232 }
233 else{
234 push (@{$data->{$key}},"GreenstoneXMLPlugout");
235 }
236 }
237 if ($currentLevel eq "plugout" and $element eq "option") {
238 my $key = $nameMap->{$currentLevel};
239 if (defined $name and $name ne ""){
240 push (@{$data->{$key}},$name);
241 }
242 if (defined $value and $value ne ""){
243 push (@{$data->{$key}},$value);
244 }
245 }
246
247 #@ use hash of hash of strings: hashexp
248 elsif ($element =~ /$hashexp/) {
249 if (!defined $data->{$element}) {
250 $data->{$element} = {};
251 }
252 if (defined $name and $name =~ /\w/) {
253 if (defined $filter and $filter =~ /\w/) {
254 $data->{$element}->{$name} = $filter;
255
256 }
257 }
258 }
259
260 #@ Handling each classifier/plugin element
261 elsif ($element =~ /$arrayarrayexp/) {
262 # find the gs2 mapping name
263 $currentLevel = $element;
264 my $key = $nameMap->{$element};
265
266 # define an array of array of strings foreach $k (@{$data->{$key}}) {
267 if (!defined $data->{$key}) {
268 $data->{$key} = [];
269 }
270 # Push classifier/plugin name (e.g. AZList) into $data as the first string
271 push (@{$data->{$key}->[$currentIndex]},$name);
272 if (defined $value and $value =~ /\w/) {
273 push (@{$data->{$key}->[$currentIndex]}, $value);
274 print "$value\n";
275 }
276 #print $currentIndex."indexup\n";
277 }
278
279 #@ Handling the option elements in each classifier/plugin element (as the following strings)
280 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
281 # find the gs2 mapping name for classifier and plugin
282 my $key = $nameMap->{$currentLevel};
283
284 if (defined $name and $name =~ /\w/) {
285 push (@{$data->{$key}->[$currentIndex]}, $name);
286 }
287 if (defined $value and $value =~ /\w/) {
288 push (@{$data->{$key}->[$currentIndex]}, $value);
289 }
290
291 }
292
293}
294
295sub EndTag {
296 my ($expat, $element) = @_;
297 my $endTags = q/^(browse|pluginList|displayItemList)$/; #|buildOptionList)$/;
298 if ($element =~ /$endTags/) {
299 $currentIndex = 0;
300 $currentLevel = "";
301 }
302 # $arrayarrayexp contains classifier|plugin
303 elsif($element =~ /$arrayarrayexp/ ){
304 $currentIndex = $currentIndex + 1;
305 }
306}
307
308sub Text {
309 if (defined $currentLocation) {
310 #@ Handling block metadataList(creator, maintainer, public)
311 if($currentLocation =~ /$stringexp/){
312 #print $currentLocation;
313 my $key = $nameMap->{$currentLocation};
314 $data->{$key} = $_;
315 undef $currentLocation;
316 }
317
318 #@ Handling displayItem metadata that are children of displayItemList
319 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
320 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
321 my $lang = $currentAttrRef->{'lang'};
322 my $name = $currentAttrRef->{'name'};
323
324 # this is how data->collectionmeta's language is set in Greenstone 2.
325 # Need to be consistent, since export.pl accesses these values all in the same way
326 if(!defined $lang) {
327 $lang = 'default';
328 } else {
329 $lang = "[l=$lang]";
330 }
331
332 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
333 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
334 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
335 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
336 }
337 undef $currentLocation;
338 }
339
340 #@ Handling searchtype: plain,form; arrayexp
341 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
342 # map 'searchType' into 'searchtype'
343 my $key = $nameMap->{$currentLocation};
344 # split it by ','
345 my ($plain, $form) = split (",", $_);
346
347 if (!defined $data->{$key}) {
348 $data->{$key} = [];
349 }
350 if (defined $plain and $plain =~ /\w/) {
351 push @{ $data->{$key} }, $plain;
352 }
353 if (defined $form and $form =~ /\w/) {
354 push @{ $data->{$key} }, $form;
355 }
356 }
357 }
358}
359
360# This sub is for debugging purposes
361sub Display {
362 # metadataList
363 foreach my $k (keys %{$data}) {
364 print STDERR "*** metadatalist key $k\n";
365 }
366
367 print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
368 print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
369 print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
370 print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
371 print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
372 print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
373 print STDERR "*** orthogonal build types: ".join(",",$data->{"orthogonalbuildtypes"})."\n" if (defined $data->{"orthogonalbuildtypes"});
374 print STDERR "*** search types: \n";
375 print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
376 print STDERR "*** levels: \n";
377 print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
378 print STDERR "*** index subcollections: \n";
379 print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
380 print STDERR "*** indexes: \n";
381 print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
382 print STDERR "*** index options: \n";
383 print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
384 print STDERR "*** languages: \n";
385 print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
386 print STDERR "*** language metadata: \n";
387 print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
388
389 print STDERR "*** Plugins: \n";
390 if (defined $data->{'plugin'}) {
391 foreach $a (@{$data->{'plugin'}}) {
392 print join(",",@$a);
393 print "\n";
394 }
395 }
396
397 #print STDERR "*** Build options: \n";
398 #if (defined $data->{'store_metadata_coverage'}) {
399 #foreach $a (@{$data->{'store_metadata_coverage'}}) {
400 # print join(",",@$a,@$_);
401 # print "\n";
402 #}
403 #}
404
405 if (defined $data->{'classify'}) {
406 print STDERR "*** Classifiers: \n";
407 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
408 }
409
410 if (defined $data->{'subcollection'}) {
411 foreach my $key (keys %{$data->{'subcollection'}}) {
412 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
413 }
414 }
415}
416# is this actually used??
417sub Doctype {
418 my ($expat, $name, $sysid, $pubid, $internal) = @_;
419
420 die if ($name !~ /^CollectionConfig$/);
421}
422
423# This Char function overrides the one in XML::Parser::Stream to overcome a
424# problem where $expat->{Text} is treated as the return value, slowing
425# things down significantly in some cases.
426sub Char {
427 if ($]<5.008) {
428 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
429 }
430 $_[0]->{'Text'} .= $_[1];
431 return undef;
432}
433
434
435
436
437#########################################################
438
4391;
Note: See TracBrowser for help on using the repository browser.