source: gsdl/trunk/perllib/collConfigxml.pm@ 20102

Last change on this file since 20102 was 20102, checked in by kjdon, 15 years ago

xiaofeng says that all the flax stuff here is old and not needed any more, so deleting it :-)

  • Property svn:keywords set to Author Date Id Revision
File size: 14.1 KB
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used. Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37 my $perl_dir;
38
39 # Note: $] encodes the version number of perl
40 if ($]>5.008) {
41 # perl 5.8.1 or above
42 $perl_dir = "perl-5.8";
43 }
44 elsif ($]<5.008) {
45 # assume perl 5.6
46 $perl_dir = "perl-5.6";
47 }
48 else {
49 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
50 print STDERR " Please upgrade to a newer version of Perl.\n";
51 $perl_dir = "perl-5.8";
52 }
53
54 if ($ENV{'GSDLOS'} !~ /^windows$/i) {
55 # Use push to put this on the end, so an existing XML::Parser will be used by default
56 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
57 }
58}
59
60use XML::Parser;
61
62# A mapping hash to resolve name discrepancy between gs2 and gs3.
63my $nameMap = {"key" => "value",
64 "creator" => "creator",
65 "maintainer" => "maintainer",
66 "public" => "public",
67 "infodb" => "infodbtype",
68 "defaultIndex" => "defaultindex",
69 "defaultLevel" => "defaultlevel",
70 "name" => "collectionname",
71 "description" => "collectionextra",
72 "smallicon" => "iconcollectionsmall",
73 "icon" => "iconcollection",
74 "level" => "levels",
75 "classifier" => "classify",
76 "indexSubcollection" => "indexsubcollections",
77 "indexLanguage" => "languages",
78 "defaultIndexLanguage" => "defaultlanguage",
79 "index" => "indexes",
80 "plugin" => "plugin",
81 "plugout" => "plugout",
82 "indexOption" => "indexoptions",
83 "searchType" => "searchtype",
84 "languageMetadata" => "languagemetadata",
85 "buildType" => "buildtype"
86 };
87# A hash structure which is returned by sub read_cfg_file.
88my $data = {};
89
90my $repeatedBlock = q/^(browse|pluginList)$/;
91
92# use those unique attribute values to locate the text within the elements
93# creator, public, maintainer and within a displayItem.
94my $currentLocation = "";
95my $stringexp = q/^(creator|maintainer|public|buildType)$/;
96my $displayItemNames = q/^(name|description)$/;
97
98# For storing the attributes during the StartTag subroutine, so that
99# we can use it later in Text (or EndTag) subroutines
100my $currentAttrRef = undef;
101
102my $currentLevel = "";
103
104# Count the elements with same name within the same block
105# ("plugin", "option")
106my $currentIndex = 0;
107my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
108my $arrayarrayexp= q/^(plugin|classifier)$/;
109my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
110my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
111
112my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
113
114# Reads in the model collection configuration file, collectionConfig.xml,
115# into a structure which complies with the one used by gs2 (i.e. one read
116# in by &cfgread::read_cfg_file).
117sub read_cfg_file {
118 my ($filename) = @_;
119 $data = {};
120 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
121 return undef;
122 }
123
124 # create XML::Parser object for parsing metadata.xml files
125 my $parser;
126 if ($]<5.008) {
127 # Perl 5.6
128 $parser = new XML::Parser('Style' => 'Stream',
129 'Handlers' => {'Char' => \&Char,
130 'Doctype' => \&Doctype
131 });
132 }
133 else {
134 # Perl 5.8
135 $parser = new XML::Parser('Style' => 'Stream',
136 'ProtocolEncoding' => 'ISO-8859-1',
137 'Handlers' => {'Char' => \&Char,
138 'Doctype' => \&Doctype
139 });
140 }
141
142 if (!open (COLCFG, $filename)) {
143 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
144 } else {
145
146 $parser->parsefile ($filename);# (COLCFG);
147 close (COLCFG);
148 }
149
150 #&Display;
151 return $data;
152}
153
154sub StartTag {
155# Those marked with #@ will not be executed at the same time when this sub is being called
156# so that if/elsif is used to avoid unnecessary tests
157 my ($expat, $element) = @_;
158
159 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
160 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
161 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
162 $currentAttrRef = \%_;
163
164 my $name = $_{'name'};
165 my $value = $_{'value'};
166 my $type = $_{'type'};
167
168 # for subcollections
169 my $filter = $_{'filter'};
170
171 #@ Marking repeated block
172 if ($element =~ /$repeatedBlock/) {
173 $currentIndex = 0;
174 }
175
176 #@ handling block metadataList
177 elsif (defined $name and $name =~ /$stringexp/){
178 $currentLocation = $name;
179 }
180 #@ handling default search index/level/indexLanguage and languageMetadata
181 elsif ($element =~ /$defaults/) {
182 if (defined $name and $name =~ /\w/) {
183 $data->{$nameMap->{$element}} = $name;
184 }
185 }
186
187 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
188 elsif($element eq "displayItemList") {
189 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
190 }
191 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
192 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
193 # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
194 $currentLocation = $name;
195 }
196 }
197
198 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
199 elsif ($element eq "infodb") {
200 $data->{'infodbtype'} = $type;
201 }
202
203 #@ Handling indexer: mgpp/mg/lucene; stringexp
204 elsif ($element eq "search") {
205 $data->{'buildtype'} = $type;
206 }
207
208 #@ Handling searchtype: plain,form; arrayexp
209 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
210 #@ Handling searchtype: plain, form
211 #$currentLocation = $name;
212 #}
213
214 #@ Handle index|level|indexSubcollection|indexLanguage
215 elsif ($element =~ /$arrayexp/) {
216 my $key = $nameMap->{$element};
217 if (!defined $data->{$key}) {
218 $data->{$key} = [];
219 }
220
221 push (@{$data->{$key}},$name);
222 }
223
224 #@ indexoptions: accentfold/casefold/stem; arrayexp
225 elsif ($element eq "indexOption") {
226 $currentLevel = "indexOption";
227 }
228 if ($currentLevel eq "indexOption" and $element eq "option") {
229 my $key = $nameMap->{$currentLevel};
230 if (!defined $data->{$key}) {
231 $data->{$key} = [];
232 }
233 push (@{$data->{$key}},$name);
234 }
235 #@ plugout options
236 elsif ($element eq "plugout") {
237 $currentLevel = "plugout";
238 my $key = $nameMap->{$currentLevel};
239 if (!defined $data->{$key}) {
240 $data->{$key} = [];
241 }
242 if(defined $name and $name ne ""){
243 push (@{$data->{$key}},$name);
244 }
245 else{
246 push (@{$data->{$key}},"GreenstoneXMLPlugout");
247 }
248 }
249 if ($currentLevel eq "plugout" and $element eq "option") {
250 my $key = $nameMap->{$currentLevel};
251 if (defined $name and $name ne ""){
252 push (@{$data->{$key}},$name);
253 }
254 if (defined $value and $value ne ""){
255 push (@{$data->{$key}},$value);
256 }
257 }
258
259 #@ use hash of hash of strings: hashexp
260 elsif ($element =~ /$hashexp/) {
261 if (!defined $data->{$element}) {
262 $data->{$element} = {};
263 }
264 if (defined $name and $name =~ /\w/) {
265 if (defined $filter and $filter =~ /\w/) {
266 $data->{$element}->{$name} = $filter;
267
268 }
269 }
270 }
271
272 #@ Handling each classifier/plugin element
273 elsif ($element =~ /$arrayarrayexp/) {
274 # find the gs2 mapping name
275 $currentLevel = $element;
276 my $key = $nameMap->{$element};
277
278 # define an array of array of strings foreach $k (@{$data->{$key}}) {
279 if (!defined $data->{$key}) {
280 $data->{$key} = [];
281 }
282 # Push classifier/plugin name (e.g. AZList) into $data as the first string
283 push (@{$data->{$key}->[$currentIndex]},$name);
284 #print $currentIndex."indexup\n";
285 }
286
287 #@ Handling the option elements in each classifier/plugin element (as the following strings)
288 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
289 # find the gs2 mapping name for classifier and plugin
290 my $key = $nameMap->{$currentLevel};
291
292 if (defined $name and $name =~ /\w/) {
293 push (@{$data->{$key}->[$currentIndex]}, $name);
294 }
295 if (defined $value and $value =~ /\w/) {
296 push (@{$data->{$key}->[$currentIndex]}, $value);
297 }
298
299 }
300
301}
302
303sub EndTag {
304 my ($expat, $element) = @_;
305 my $endTags = q/^(browse|pluginList|displayItemList)$/;
306 if ($element =~ /$endTags/) {
307 $currentIndex = 0;
308 $currentLevel = "";
309 }
310 # $arrayarrayexp contains classifier|plugin
311 elsif($element =~ /$arrayarrayexp/ ){
312 $currentIndex = $currentIndex + 1;
313 }
314}
315
316sub Text {
317 if (defined $currentLocation) {
318 #@ Handling block metadataList(creator, maintainer, public)
319 if($currentLocation =~ /$stringexp/){
320 #print $currentLocation;
321 my $key = $nameMap->{$currentLocation};
322 $data->{$key} = $_;
323 undef $currentLocation;
324 }
325
326 #@ Handling displayItem metadata that are children of displayItemList
327 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
328 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
329 my $lang = $currentAttrRef->{'lang'};
330 my $name = $currentAttrRef->{'name'};
331
332 # this is how data->collectionmeta's language is set in Greenstone 2.
333 # Need to be consistent, since export.pl accesses these values all in the same way
334 if(!defined $lang) {
335 $lang = 'default';
336 } else {
337 $lang = "[l=$lang]";
338 }
339
340 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
341 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
342 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
343 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
344 }
345 undef $currentLocation;
346 }
347
348 #@ Handling searchtype: plain,form; arrayexp
349 elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
350 # map 'searchType' into 'searchtype'
351 my $key = $nameMap->{$currentLocation};
352 # split it by ','
353 my ($plain, $form) = split (",", $_);
354
355 if (!defined $data->{$key}) {
356 $data->{$key} = [];
357 }
358 if (defined $plain and $plain =~ /\w/) {
359 push @{ $data->{$key} }, $plain;
360 }
361 if (defined $form and $form =~ /\w/) {
362 push @{ $data->{$key} }, $form;
363 }
364 }
365 }
366}
367
368# This sub is for debugging purposes
369sub Display {
370 # metadataList
371 foreach my $k (keys %{$data}) {
372 print STDERR "*** metadatalist key $k\n";
373 }
374
375 print $data->{'creator'}."\n" if (defined $data->{'creator'});
376 print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
377 print $data->{"public"}."\n" if (defined $data->{"public"});
378 print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
379 print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
380 print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
381 print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
382 print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
383 print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
384 print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
385 print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
386 print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
387 print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
388
389 if (defined $data->{'plugin'}) {
390 foreach $a (@{$data->{'plugin'}}) {
391 print join(",",@$a);
392 print "\n";
393 }
394 }
395 if (defined $data->{'classify'}) {
396 print "Classifiers: \n";
397 map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
398 }
399
400 if (defined $data->{'subcollection'}) {
401 foreach my $key (keys %{$data->{'subcollection'}}) {
402 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
403 }
404 }
405}
406sub Doctype {
407 my ($expat, $name, $sysid, $pubid, $internal) = @_;
408
409 # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files
410 # to be processed as well as the "DirectoryMetadata" files which should now
411 # be created by import.pl
412 die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);
413}
414
415# This Char function overrides the one in XML::Parser::Stream to overcome a
416# problem where $expat->{Text} is treated as the return value, slowing
417# things down significantly in some cases.
418sub Char {
419 if ($]<5.008) {
420 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
421 }
422 $_[0]->{'Text'} .= $_[1];
423 return undef;
424}
425
426
427
428
429#########################################################
430
4311;
Note: See TracBrowser for help on using the repository browser.