source: gsdl/trunk/perllib/collConfigxml.pm@ 20099

Last change on this file since 20099 was 20099, checked in by kjdon, 15 years ago

removed the buildConfig stuff

  • Property svn:keywords set to Author Date Id Revision
File size: 15.0 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used. Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37 my $perl_dir;
38
39 # Note: $] encodes the version number of perl
40 if ($]>5.008) {
41 # perl 5.8.1 or above
42 $perl_dir = "perl-5.8";
43 }
44 elsif ($]<5.008) {
45 # assume perl 5.6
46 $perl_dir = "perl-5.6";
47 }
48 else {
49 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
50 print STDERR " Please upgrade to a newer version of Perl.\n";
51 $perl_dir = "perl-5.8";
52 }
53
54 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
55 # Use push to put this on the end, so an existing XML::Parser will be used by default
56 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
57 }
58}
59
60use XML::Parser;
61
62# A mapping hash to resolve name discrepancy between gs2 and gs3.
63my $nameMap = {"key" => "value",
64 "creator" => "creator",
65 "maintainer" => "maintainer",
66 "public" => "public",
67 "infodb" => "infodbtype",
68 "defaultIndex" => "defaultindex",
69 "defaultLevel" => "defaultlevel",
70 "name" => "collectionname",
71 "description" => "collectionextra",
72 "smallicon" => "iconcollectionsmall",
73 "icon" => "iconcollection",
74 "level" => "levels",
75 "classifier" => "classify",
76 "indexSubcollection" => "indexsubcollections",
77 "indexLanguage" => "languages",
78 "defaultIndexLanguage" => "defaultlanguage",
79 "index" => "indexes",
80 "plugin" => "plugin",
81 "plugout" => "plugout",
82 "indexOption" => "indexoptions",
83 "searchType" => "searchtype",
84 "languageMetadata" => "languagemetadata",
85 "buildType" => "buildtype"
86 };
87# A hash structure which is returned by sub read_cfg_file.
88my $data = {};
89
90my $repeatedBlock = q/^(browse|pluginList)$/;
91
92# use those unique attribute values to locate the text within the elements
93# creator, public, maintainer and within a displayItem.
94my $currentLocation = "";
95my $stringexp = q/^(creator|maintainer|public|buildType)$/;
96my $displayItemNames = q/^(name|description)$/;
97
98# For storing the attributes during the StartTag subroutine, so that
99# we can use it later in Text (or EndTag) subroutines
100my $currentAttrRef = undef;
101
102my $currentLevel = "";
103
104# Count the elements with same name within the same block
105# ("plugin", "option")
106my $currentIndex = 0;
107my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
108my $arrayarrayexp= q/^(plugin|classifier)$/;
109my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
110my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
111
112my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
113
114# Reads in the model collection configuration file, collectionConfig.xml,
115# into a structure which complies with the one used by gs2 (i.e. one read
116# in by &cfgread::read_cfg_file).
117sub read_cfg_file {
118 my ($filename) = @_;
119 $data = {};
120 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
121 return undef;
122 }
123
124 # create XML::Parser object for parsing metadata.xml files
125 my $parser;
126 if ($]<5.008) {
127 # Perl 5.6
128 $parser = new XML::Parser('Style' => 'Stream',
129 'Handlers' => {'Char' => \&Char,
130 'Doctype' => \&Doctype
131 });
132 }
133 else {
134 # Perl 5.8
135 $parser = new XML::Parser('Style' => 'Stream',
136 'ProtocolEncoding' => 'ISO-8859-1',
137 'Handlers' => {'Char' => \&Char,
138 'Doctype' => \&Doctype
139 });
140 }
141
142 if (!open (COLCFG, $filename)) {
143 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
144 } else {
145
146 $parser->parsefile ($filename);# (COLCFG);
147 close (COLCFG);
148 }
149
150 #&Display;
151 return $data;
152}
153
154sub StartTag {
155# Those marked with #@ will not be executed at the same time when this sub is being called
156# so that if/elsif is used to avoid unnecessary tests
157 my ($expat, $element) = @_;
158
159 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
160 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
161 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
162 $currentAttrRef = \%_;
163
164 my $name = $_{'name'};
165 my $value = $_{'value'};
166 my $type = $_{'type'};
167
168 # for subcollections
169 my $filter = $_{'filter'};
170
171 # for flax activities
172 my $desid = $_{'desid'};
173 my $assigned = $_{'assigned'};
174 my $lang = $_{'lang'};
175
176 #@ Marking repeated block
177 if ($element =~ /$repeatedBlock/) {
178 $currentIndex = 0;
179 }
180
181 #@ handling block metadataList
182 elsif (defined $name and $name =~ /$stringexp/){
183 $currentLocation = $name;
184 }
185 #@ handling default search index/level/indexLanguage and languageMetadata
186 elsif ($element =~ /$defaults/) {
187 if (defined $name and $name =~ /\w/) {
188 $data->{$nameMap->{$element}} = $name;
189 }
190 }
191
192 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
193 elsif($element eq "displayItemList") {
194 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
195 }
196 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
197 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
198 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
199 $currentLocation = $name;
200 }
201 }
202
203 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
204 elsif ($element eq "infodb") {
205 $data->{'infodbtype'} = $type;
206 }
207
208 #@ Handling indexer: mgpp/mg/lucene; stringexp
209 elsif ($element eq "search") {
210 $data->{'buildtype'} = $type;
211 }
212
213 #@ Handling searchtype: plain,form; arrayexp
214 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
215 #@ Handling searchtype: plain, form
216 #$currentLocation = $name;
217 #}
218
219 #@ Handle index|level|indexSubcollection|indexLanguage
220 elsif ($element =~ /$arrayexp/) {
221 my $key = $nameMap->{$element};
222 if (!defined $data->{$key}) {
223 $data->{$key} = [];
224 }
225
226 push (@{$data->{$key}},$name);
227 }
228
229 #@ indexoptions: accentfold/casefold/stem; arrayexp
230 elsif ($element eq "indexOption") {
231 $currentLevel = "indexOption";
232 }
233 if ($currentLevel eq "indexOption" and $element eq "option") {
234 my $key = $nameMap->{$currentLevel};
235 if (!defined $data->{$key}) {
236 $data->{$key} = [];
237 }
238 push (@{$data->{$key}},$name);
239 }
240 #@ plugout options
241 elsif ($element eq "plugout") {
242 $currentLevel = "plugout";
243 my $key = $nameMap->{$currentLevel};
244 if (!defined $data->{$key}) {
245 $data->{$key} = [];
246 }
247 if(defined $name and $name ne ""){
248 push (@{$data->{$key}},$name);
249 }
250 else{
251 push (@{$data->{$key}},"GreenstoneXMLPlugout");
252 }
253 }
254 if ($currentLevel eq "plugout" and $element eq "option") {
255 my $key = $nameMap->{$currentLevel};
256 if (defined $name and $name ne ""){
257 push (@{$data->{$key}},$name);
258 }
259 if (defined $value and $value ne ""){
260 push (@{$data->{$key}},$value);
261 }
262 }
263
264 #@ use hash of hash of strings: hashexp
265 elsif ($element =~ /$hashexp/) {
266 if (!defined $data->{$element}) {
267 $data->{$element} = {};
268 }
269 if (defined $name and $name =~ /\w/) {
270 if (defined $filter and $filter =~ /\w/) {
271 $data->{$element}->{$name} = $filter;
272
273 }
274 }
275 }
276
277 #@ Handling each classifier/plugin element
278 elsif ($element =~ /$arrayarrayexp/) {
279 # find the gs2 mapping name
280 $currentLevel = $element;
281 my $key = $nameMap->{$element};
282
283 # define an array of array of strings foreach $k (@{$data->{$key}}) {
284 if (!defined $data->{$key}) {
285 $data->{$key} = [];
286 }
287 # Push classifier/plugin name (e.g. AZList) into $data as the first string
288 push (@{$data->{$key}->[$currentIndex]},$name);
289 #print $currentIndex."indexup\n";
290 }
291
292 #@ Handling the option elements in each classifier/plugin element (as the following strings)
293 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
294 # find the gs2 mapping name for classifier and plugin
295 my $key = $nameMap->{$currentLevel};
296
297 if (defined $name and $name =~ /\w/) {
298 push (@{$data->{$key}->[$currentIndex]}, $name);
299 }
300 if (defined $value and $value =~ /\w/) {
301 push (@{$data->{$key}->[$currentIndex]}, $value);
302 }
303
304 }
305 #@ Handling each flaxActivity element (arrayarrayexp)
306 elsif ($element eq "flaxActivity") {
307 if (!defined $data->{'flaxActivity'}) {
308 $data->{'flaxActivity'} = [];
309 }
310 if(defined $assigned and $assigned =~ /\w/ and $assigned eq "true") {
311 if (defined $name and $name =~ /\w/) {
312 push (@{$data->{'flaxActivity'}->[$currentIndex]}, 'name');
313 push (@{$data->{'flaxActivity'}->[$currentIndex]}, $name);
314 }
315
316 if (defined $desid and $desid =~ /\w/) {
317 push (@{$data->{'flaxActivity'}->[$currentIndex]}, 'desid');
318 push (@{$data->{'flaxActivity'}->[$currentIndex]}, $desid);
319 }
320
321 if (defined $lang and $lang =~ /\w/) {
322 push (@{$data->{'flaxActivity'}->[$currentIndex]}, 'lang');
323 push (@{$data->{'flaxActivity'}->[$currentIndex]}, $lang);
324 }
325 }
326 }
327}
328
329sub EndTag {
330 my ($expat, $element) = @_;
331 my $endTags = q/^(browse|pluginList|displayItemList)$/;
332 if ($element =~ /$endTags/) {
333 $currentIndex = 0;
334 $currentLevel = "";
335 }
336 # $arrayarrayexp contains classifier|plugin
337 elsif($element =~ /$arrayarrayexp/ || $element eq "flaxActivity"){
338 $currentIndex = $currentIndex + 1;
339 }
340}
341
342sub Text {
343 if (defined $currentLocation) {
344 #@ Handling block metadataList(creator, maintainer, public)
345 if($currentLocation =~ /$stringexp/){
346 #print $currentLocation;
347 my $key = $nameMap->{$currentLocation};
348 $data->{$key} = $_;
349 undef $currentLocation;
350 }
351
352 #@ Handling displayItem metadata that are children of displayItemList
353 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
354 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
355 my $lang = $currentAttrRef->{'lang'};
356 my $name = $currentAttrRef->{'name'};
357
358 # this is how data->collectionmeta's language is set in Greenstone 2.
359 # Need to be consistent, since export.pl accesses these values all in the same way
360 if(!defined $lang) {
361 $lang = 'default';
362 } else {
363 $lang = "[l=$lang]";
364 }
365
366 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
367 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
368 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
369 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
370 }
371 undef $currentLocation;
372 }
373
374 #@ Handling searchtype: plain,form; arrayexp
375 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
376 # map 'searchType' into 'searchtype'
377 my $key = $nameMap->{$currentLocation};
378 # split it by ','
379 my ($plain, $form) = split (",", $_);
380
381 if (!defined $data->{$key}) {
382 $data->{$key} = [];
383 }
384 if (defined $plain and $plain =~ /\w/) {
385 push @{ $data->{$key} }, $plain;
386 }
387 if (defined $form and $form =~ /\w/) {
388 push @{ $data->{$key} }, $form;
389 }
390 }
391 }
392}
393
394# This sub is for debugging purposes
395sub Display {
396 # metadataList
397 foreach my $k (keys %{$data}) {
398 print STDERR "*** metadatalist key $k\n";
399 }
400
401 print $data->{'creator'}."\n" if (defined $data->{'creator'});
402 print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
403 print $data->{"public"}."\n" if (defined $data->{"public"});
404 print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
405 print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
406 print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
407 print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
408 print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
409 print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
410 print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
411 print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
412 print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
413 print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
414
415 if (defined $data->{'plugin'}) {
416 foreach $a (@{$data->{'plugin'}}) {
417 print join(",",@$a);
418 print "\n";
419 }
420 }
421 if (defined $data->{'classify'}) {
422 print "Classifiers: \n";
423 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
424 }
425
426 if (defined $data->{'subcollection'}) {
427 foreach my $key (keys %{$data->{'subcollection'}}) {
428 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
429 }
430 }
431}
432sub Doctype {
433 my ($expat, $name, $sysid, $pubid, $internal) = @_;
434
435 # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files
436 # to be processed as well as the "DirectoryMetadata" files which should now
437 # be created by import.pl
438 die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);
439}
440
441# This Char function overrides the one in XML::Parser::Stream to overcome a
442# problem where $expat->{Text} is treated as the return value, slowing
443# things down significantly in some cases.
444sub Char {
445 if ($]<5.008) {
446 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
447 }
448 $_[0]->{'Text'} .= $_[1];
449 return undef;
450}
451
452
453
454
455#########################################################
456
4571;
Note: See TracBrowser for help on using the repository browser.