root/main/trunk/greenstone2/perllib/collConfigxml.pm @ 23895

Revision 23895, 14.2 KB (checked in by sjm84, 9 years ago)

Modified several Perl files to merge the locations where XML::Parser checked for the current version of perl into one location. Also tidied up several locations where a difference was specified between 5.6 and 5.8+ to do with ProtocolEncoding? being used to initialise an XML::Parser. Given the recent "

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use XMLParser;
34
35# A mapping hash to resolve name discrepancy between gs2 and gs3.
36my $nameMap = {"key" => "value",
37           "creator" => "creator",
38           "maintainer" => "maintainer",
39           "public" => "public",
40           "infodb" => "infodbtype",
41           "defaultIndex" => "defaultindex",
42           "defaultLevel" => "defaultlevel",
43           "name" => "collectionname",
44           "description" => "collectionextra",
45           "smallicon" => "iconcollectionsmall",
46           "icon" => "iconcollection",
47           "level" => "levels",
48           "classifier" => "classify",
49           "indexSubcollection" => "indexsubcollections",
50           "indexLanguage" => "languages",
51           "defaultIndexLanguage" => "defaultlanguage",
52           "index" => "indexes",
53           "plugin" => "plugin",
54           "plugout" => "plugout",
55           "indexOption" => "indexoptions",
56           "searchType" => "searchtype",
57           "languageMetadata" => "languagemetadata",
58           "buildType" => "buildtype",
59           "buildOption" => "buildOption"
60           };
61# A hash structure which is returned by sub read_cfg_file.
62my $data = {};
63
64my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
65
66# use those unique attribute values to locate the text within the elements
67# creator, public, maintainer and within a displayItem.
68my $currentLocation = "";
69my $stringexp = q/^(creator|maintainer|public|buildType)$/;
70my $displayItemNames = q/^(name|description)$/;
71 
72# For storing the attributes during the StartTag subroutine, so that
73# we can use it later in Text (or EndTag) subroutines
74my $currentAttrRef = undef;
75
76my $currentLevel = "";
77
78# Count the elements with same name within the same block
79# ("plugin", "option")
80my $currentIndex = 0;
81my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
82my $arrayarrayexp= q/^(plugin|classifier)$/; #|buildOption)$/;
83my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
84my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
85
86my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
87
88# Reads in the model collection configuration file, collectionConfig.xml,
89# into a structure which complies with the one used by gs2 (i.e. one read
90# in by &cfgread::read_cfg_file).
91sub read_cfg_file {
92    my ($filename) = @_;
93    $data = {};
94    if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
95        return undef;
96    }
97
98    # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
99
100    # create XML::Parser object for parsing metadata.xml files
101    my $parser = new XML::Parser('Style' => 'Stream',
102                 'Pkg' => 'collConfigxml',
103                 'Handlers' => {'Char' => \&Char,
104                         'Doctype' => \&Doctype
105                         });
106    if (!open (COLCFG, $filename)) {
107    print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
108    } else {
109
110      $parser->parsefile ($filename);# (COLCFG);
111      close (COLCFG);
112    }
113
114    #&Display;
115    return $data;
116}
117
118sub StartTag {
119# Those marked with #@ will not be executed at the same time when this sub is being called
120# so that if/elsif is used to avoid unnecessary tests
121    my ($expat, $element) = @_;
122   
123    # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
124    # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
125    # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
126    $currentAttrRef = \%_;
127
128    my $name = $_{'name'};
129    my $value = $_{'value'};
130    my $type = $_{'type'};
131
132    # for subcollections
133    my $filter = $_{'filter'};
134   
135    # was this just a flax thing??
136    my $assigned = $_{'assigned'};
137   
138    #@ Marking repeated block
139    if ($element =~ /$repeatedBlock/) {
140    $currentIndex = 0;
141    }
142
143    #@ handling block metadataList
144    elsif (defined $name and $name =~ /$stringexp/){
145      $currentLocation = $name;
146    }
147    #@ handling default search index/level/indexLanguage and languageMetadata
148    elsif ($element =~ /$defaults/) {
149      if (defined $name and $name =~ /\w/) {
150    $data->{$nameMap->{$element}} = $name;
151      }
152    }
153
154    #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
155    elsif($element eq "displayItemList") {
156    $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
157    }
158    elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
159    if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
160        # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
161        $currentLocation = $name;
162    }
163    }
164
165    #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
166    elsif ($element eq "infodb") {
167      $data->{'infodbtype'} = $type;
168    }
169   
170    #@ Handling indexer: mgpp/mg/lucene; stringexp
171    elsif ($element eq "search") {
172      $data->{'buildtype'} = $type;
173    }
174
175    #@ Handling searchtype: plain,form; arrayexp
176    #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
177    #@ Handling searchtype: plain, form
178    #$currentLocation = $name; 
179    #}
180 
181    #@ Handle index|level|indexSubcollection|indexLanguage
182    elsif ($element =~ /$arrayexp/) {
183      my $key = $nameMap->{$element};   
184      if (!defined $data->{$key}) {
185    $data->{$key} = [];
186      }
187
188      push (@{$data->{$key}},$name);
189    }
190
191    #*****************************************
192    elsif ($element eq "buildOption") {
193        print STDERR "**** BUILD OPTION PAIR $name $value\n";
194        $data->{$name} = $value;
195    }   
196
197
198    #@ indexoptions: accentfold/casefold/stem; arrayexp
199    elsif ($element eq "indexOption") {
200      $currentLevel = "indexOption";
201    }
202    if ($currentLevel eq "indexOption" and $element eq "option") {
203      my $key = $nameMap->{$currentLevel}; 
204      if (!defined $data->{$key}) {
205    $data->{$key} = [];
206      }
207      push (@{$data->{$key}},$name);
208    }
209    #@ plugout options
210    elsif ($element eq "plugout") {
211    $currentLevel = "plugout";
212    my $key = $nameMap->{$currentLevel};   
213    if (!defined $data->{$key}) {
214        $data->{$key} = [];
215    }
216    if(defined $name and $name ne ""){
217        push (@{$data->{$key}},$name);
218    }
219    else{
220       push (@{$data->{$key}},"GreenstoneXMLPlugout");
221    }
222    }
223    if ($currentLevel eq "plugout" and $element eq "option") {     
224    my $key = $nameMap->{$currentLevel};
225    if (defined $name and $name ne ""){
226        push (@{$data->{$key}},$name);
227    }
228    if (defined $value and $value ne  ""){
229        push (@{$data->{$key}},$value);
230    }
231    }
232
233    #@ use hash of hash of strings: hashexp
234    elsif ($element =~ /$hashexp/) {
235      if (!defined $data->{$element}) {
236    $data->{$element} = {};
237      }
238      if (defined $name and $name =~ /\w/) {
239    if (defined $filter and $filter =~ /\w/) {
240      $data->{$element}->{$name} = $filter;
241
242    }
243      }
244    }
245
246    #@ Handling each classifier/plugin element
247    elsif ($element =~ /$arrayarrayexp/) {
248    # find the gs2 mapping name
249        $currentLevel = $element;
250        my $key = $nameMap->{$element};
251
252    # define an array of array of strings   foreach $k (@{$data->{$key}}) {
253    if (!defined $data->{$key}) {
254      $data->{$key} = [];
255    }
256    # Push classifier/plugin name (e.g. AZList) into $data as the first string
257    push (@{$data->{$key}->[$currentIndex]},$name);
258    if (defined $value and $value =~ /\w/) {
259        push (@{$data->{$key}->[$currentIndex]}, $value);
260        print "$value\n";
261    }
262    #print $currentIndex."indexup\n";
263      }
264   
265    #@ Handling the option elements in each classifier/plugin element (as the following strings)
266    elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
267    # find the gs2 mapping name for classifier and plugin
268        my $key = $nameMap->{$currentLevel};   
269
270    if (defined $name and $name =~ /\w/) {
271        push (@{$data->{$key}->[$currentIndex]}, $name);
272    }
273    if (defined $value and $value =~ /\w/) {
274            push (@{$data->{$key}->[$currentIndex]}, $value);
275    }
276
277    }
278   
279}
280
281sub EndTag {
282    my ($expat, $element) = @_;
283    my $endTags = q/^(browse|pluginList|displayItemList)$/; #|buildOptionList)$/;   
284    if ($element =~ /$endTags/) {
285        $currentIndex = 0;
286        $currentLevel = "";
287    }
288    # $arrayarrayexp contains classifier|plugin
289    elsif($element =~ /$arrayarrayexp/ ){
290        $currentIndex = $currentIndex + 1;
291    }
292}
293
294sub Text {
295    if (defined $currentLocation) {
296    #@ Handling block metadataList(creator, maintainer, public)
297    if($currentLocation =~ /$stringexp/){
298        #print $currentLocation;
299        my $key = $nameMap->{$currentLocation};
300        $data->{$key} = $_;
301        undef $currentLocation;
302    }
303   
304    #@ Handling displayItem metadata that are children of displayItemList
305    # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
306    elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
307        my $lang = $currentAttrRef->{'lang'};
308        my $name = $currentAttrRef->{'name'};
309       
310        # this is how data->collectionmeta's language is set in Greenstone 2.
311        # Need to be consistent, since export.pl accesses these values all in the same way
312        if(!defined $lang) {
313        $lang = 'default';
314        } else {
315        $lang = "[l=$lang]";
316        }
317       
318        if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
319        # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
320        $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
321        #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
322        }
323        undef $currentLocation;
324    }
325 
326    #@ Handling searchtype: plain,form; arrayexp
327    elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
328        # map 'searchType' into 'searchtype'
329        my $key = $nameMap->{$currentLocation};
330        # split it by ','
331        my ($plain, $form) = split (",", $_);
332       
333        if (!defined $data->{$key}) {
334        $data->{$key} = [];
335        }
336        if (defined $plain and $plain =~ /\w/) {
337        push @{ $data->{$key} }, $plain;
338        }
339        if (defined $form and $form =~ /\w/) {
340        push @{ $data->{$key} }, $form;
341        }
342    }
343    }   
344}
345
346# This sub is for debugging purposes
347sub Display {
348    # metadataList
349    foreach my $k (keys %{$data}) {
350    print STDERR "*** metadatalist key $k\n";
351    }
352 
353    print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
354    print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
355    print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
356    print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
357    print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
358    print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
359    print STDERR "*** search types: \n";
360    print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
361    print STDERR "*** levels: \n";
362    print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
363    print STDERR "*** index subcollections: \n";
364    print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
365    print STDERR "*** indexes: \n";
366    print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
367    print STDERR "*** index options: \n";
368    print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
369    print STDERR "*** languages: \n";
370    print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
371    print STDERR "*** language metadata: \n";
372    print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
373 
374    print STDERR "*** Plugins: \n";
375    if (defined $data->{'plugin'}) {
376    foreach $a (@{$data->{'plugin'}}) {
377        print join(",",@$a);
378        print "\n";
379    }
380    }
381
382    #print STDERR "*** Build options: \n";
383    #if (defined $data->{'store_metadata_coverage'}) {
384    #foreach $a (@{$data->{'store_metadata_coverage'}}) {
385    #    print join(",",@$a,@$_);
386    #    print "\n";
387    #}
388    #}
389
390    if (defined $data->{'classify'}) {
391    print STDERR "*** Classifiers: \n";
392    map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
393    }
394   
395    if (defined $data->{'subcollection'}) {
396    foreach my $key (keys %{$data->{'subcollection'}}) {
397        print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
398    }
399    }
400}
401# is this actually used??
402sub Doctype {
403    my ($expat, $name, $sysid, $pubid, $internal) = @_;
404
405    die if ($name !~ /^CollectionConfig$/);
406}
407
408# This Char function overrides the one in XML::Parser::Stream to overcome a
409# problem where $expat->{Text} is treated as the return value, slowing
410# things down significantly in some cases.
411sub Char {
412    if ($]<5.008) {
413    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
414    }
415    $_[0]->{'Text'} .= $_[1];
416    return undef;
417}
418
419
420
421
422#########################################################
423
4241;
Note: See TracBrowser for help on using the browser.