root/main/trunk/greenstone2/perllib/collConfigxml.pm @ 28034

Revision 28034, 14.9 KB (checked in by kjdon, 6 years ago)

handling importOption and buildOption - they are not options in a importOptions list, just xxOptions at the top level

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use XMLParser;
34
35# A mapping hash to resolve name discrepancy between gs2 and gs3.
36# the first item is the gs3 element name from collectionConfig, the second one
37# is the internal name for the option
38my $nameMap = {"key" => "value",
39           "creator" => "creator",
40           "maintainer" => "maintainer",
41           "public" => "public",
42           "infodb" => "infodbtype",
43           "defaultIndex" => "defaultindex",
44           "defaultLevel" => "defaultlevel",
45           "name" => "collectionname",
46           "description" => "collectionextra",
47           "smallicon" => "iconcollectionsmall",
48           "icon" => "iconcollection",
49           "level" => "levels",
50           "classifier" => "classify",
51           "indexSubcollection" => "indexsubcollections",
52           "indexLanguage" => "languages",
53           "defaultIndexLanguage" => "defaultlanguage",
54           "index" => "indexes",
55           "sort" => "sortfields",
56           "facet" => "facetfields",
57           "plugin" => "plugin",
58           "plugout" => "plugout",
59           "indexOption" => "indexoptions",
60           "searchType" => "searchtype",
61           "languageMetadata" => "languagemetadata",
62           "buildType" => "buildtype",
63           "orthogonalBuildTypes" => "orthogonalbuildtypes",
64           };
65# A hash structure which is returned by sub read_cfg_file.
66my $data = {};
67
68my $repeatedBlock = q/^(browse|pluginList)$/;
69
70# use those unique attribute values to locate the text within the elements
71# creator, public, maintainer and within a displayItem.
72my $currentLocation = "";
73my $stringexp = q/^(creator|maintainer|public|buildType)$/;
74my $displayItemNames = q/^(name|description)$/;
75 
76# these options get set at top level
77my $topleveloptionexp = q/^(importOption|buildOption)$/;
78
79# For storing the attributes during the StartTag subroutine, so that
80# we can use it later in Text (or EndTag) subroutines
81my $currentAttrRef = undef;
82
83my $currentLevel = "";
84
85# Count the elements with same name within the same block
86# ("plugin", "option")
87my $currentIndex = 0;
88my $arrayexp = q/^(index|sort|facet|level|indexOption|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/;
89my $arrayarrayexp = q/^(plugin|classifier)$/; #|buildOption)$/;
90my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
91my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
92
93my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
94
95# Reads in the model collection configuration file, collectionConfig.xml,
96# into a structure which complies with the one used by gs2 (i.e. one read
97# in by &cfgread::read_cfg_file).
98sub read_cfg_file {
99    my ($filename) = @_;
100    $data = {};
101    if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
102        return undef;
103    }
104
105    # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
106
107    # create XML::Parser object for parsing metadata.xml files
108    my $parser = new XML::Parser('Style' => 'Stream',
109                 'Pkg' => 'collConfigxml',
110                 'Handlers' => {'Char' => \&Char,
111                         'Doctype' => \&Doctype
112                         });
113    if (!open (COLCFG, $filename)) {
114    print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
115    } else {
116
117      $parser->parsefile ($filename);# (COLCFG);
118      close (COLCFG);
119    }
120
121    #&Display;
122    return $data;
123}
124
125sub StartTag {
126# Those marked with #@ will not be executed at the same time when this sub is being called
127# so that if/elsif is used to avoid unnecessary tests
128    my ($expat, $element) = @_;
129   
130    # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
131    # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
132    # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
133    $currentAttrRef = \%_;
134
135    my $name = $_{'name'};
136    my $value = $_{'value'};
137    my $type = $_{'type'};
138    my $orthogonal = $_{'orthogonal'};
139
140    # for subcollections
141    my $filter = $_{'filter'};
142   
143    # was this just a flax thing??
144    my $assigned = $_{'assigned'};
145   
146    #@ Marking repeated block
147    if ($element =~ /$repeatedBlock/) {
148    $currentIndex = 0;
149    }
150
151    #@ handling block metadataList
152    elsif (defined $name and $name =~ /$stringexp/){
153      $currentLocation = $name;
154    }
155    #@ handling default search index/level/indexLanguage and languageMetadata
156    elsif ($element =~ /$defaults/) {
157      if (defined $name and $name =~ /\w/) {
158    $data->{$nameMap->{$element}} = $name;
159      }
160    }
161
162    #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
163    elsif($element eq "displayItemList") {
164    $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
165    }
166    elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
167    if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
168        # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
169        $currentLocation = $name;
170    }
171    }
172
173    #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
174    elsif ($element eq "infodb") {
175      $data->{'infodbtype'} = $type;
176    }
177   
178    #@ Handling indexer: mgpp/mg/lucene; stringexp
179    #@ Handling orthogonal indexers: audioDB; arrayexp
180    elsif ($element eq "search") {
181    if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) {
182        push(@{$data->{'orthogonalbuildtypes'}},$type);
183    }
184    else {
185        $data->{'buildtype'} = $type;
186    }
187    }
188   
189    elsif ($element eq "store_metadata_coverage")
190    {
191##  print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*";
192    $data->{'store_metadata_coverage'} = $value;
193    }
194
195    #@ Handling searchtype: plain,form; arrayexp
196    #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
197    #@ Handling searchtype: plain, form
198    #$currentLocation = $name; 
199    #}
200 
201    #@ Handle index|sort|facet|level|indexOption|indexSubcollection|indexLanguage
202    elsif ($element =~ /$arrayexp/) {
203      my $key = $nameMap->{$element};   #
204      if (!defined $data->{$key}) {
205    $data->{$key} = [];
206      }
207
208      if (defined $name) {
209      push (@{$data->{$key}},$name);
210      }
211    }
212    # importOption and buildOption, just stored at top level, name=value,
213    # as per gs2 version
214    elsif ($element =~ /$topleveloptionexp/) {
215    if (defined $name) {
216        if (!defined $value) {
217        # flag option, set to true
218        $value = "true";
219        }
220        $data->{$name} = $value;
221    }
222    }
223
224    #@ plugout options
225    elsif ($element eq "plugout") {
226    $currentLevel = "plugout";
227    my $key = $nameMap->{$currentLevel};   
228    if (!defined $data->{$key}) {
229        $data->{$key} = [];
230    }
231    if(defined $name and $name ne ""){
232        push (@{$data->{$key}},$name);
233    }
234    else{
235       push (@{$data->{$key}},"GreenstoneXMLPlugout");
236    }
237    }
238    if ($currentLevel eq "plugout" and $element eq "option") {     
239    my $key = $nameMap->{$currentLevel};
240    if (defined $name and $name ne ""){
241        push (@{$data->{$key}},$name);
242    }
243    if (defined $value and $value ne  ""){
244        push (@{$data->{$key}},$value);
245    }
246    }
247
248    #@ use hash of hash of strings: hashexp
249    elsif ($element =~ /$hashexp/) {
250      if (!defined $data->{$element}) {
251    $data->{$element} = {};
252      }
253      if (defined $name and $name =~ /\w/) {
254    if (defined $filter and $filter =~ /\w/) {
255      $data->{$element}->{$name} = $filter;
256
257    }
258      }
259    }
260
261    #@ Handling each classifier/plugin element
262    elsif ($element =~ /$arrayarrayexp/) {
263    # find the gs2 mapping name
264        $currentLevel = $element;
265        my $key = $nameMap->{$element};
266   
267    # define an array of array of strings   foreach $k (@{$data->{$key}}) {
268    if (!defined $data->{$key}) {
269        $data->{$key} = [];
270    }
271   
272    # Push classifier/plugin name (e.g. AZList) into $data as the first string
273    push (@{$data->{$key}->[$currentIndex]},$name);
274    if (defined $value and $value =~ /\w/) {
275        push (@{$data->{$key}->[$currentIndex]}, $value);
276        print "$value\n";
277    }   
278    #print $currentIndex."indexup\n";
279    }
280
281    #@ Handling the option elements in each classifier/plugin element (as the following strings)
282    elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
283    # find the gs2 mapping name for classifier and plugin
284        my $key = $nameMap->{$currentLevel};   
285
286    if (defined $name and $name =~ /\w/) {
287        push (@{$data->{$key}->[$currentIndex]}, $name);
288    }
289    if (defined $value and $value =~ /\w/) {
290            push (@{$data->{$key}->[$currentIndex]}, $value);
291    }
292
293    }
294
295
296}
297
298sub EndTag {
299    my ($expat, $element) = @_;
300    my $endTags = q/^(browse|pluginList|displayItemList|indexOption)$/; #|buildOptionList)$/;   
301    if ($element =~ /$endTags/) {
302        $currentIndex = 0;
303        $currentLevel = "";
304    }
305
306    # $arrayarrayexp contains classifier|plugin
307    elsif($element =~ /$arrayarrayexp/ ){
308        $currentIndex = $currentIndex + 1;
309    }
310}
311
312sub Text {
313    if (defined $currentLocation) {
314    #@ Handling block metadataList(creator, maintainer, public)
315    if($currentLocation =~ /$stringexp/){
316        #print $currentLocation;
317        my $key = $nameMap->{$currentLocation};
318        $data->{$key} = $_;
319        undef $currentLocation;
320    }
321   
322    #@ Handling displayItem metadata that are children of displayItemList
323    # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
324    elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
325        my $lang = $currentAttrRef->{'lang'};
326        my $name = $currentAttrRef->{'name'};
327       
328        # this is how data->collectionmeta's language is set in Greenstone 2.
329        # Need to be consistent, since export.pl accesses these values all in the same way
330        if(!defined $lang) {
331        $lang = 'default';
332        } else {
333        $lang = "[l=$lang]";
334        }
335       
336        if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
337        # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
338        $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
339        #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
340        }
341        undef $currentLocation;
342    }
343 
344    #@ Handling searchtype: plain,form; arrayexp
345    elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
346        # map 'searchType' into 'searchtype'
347        my $key = $nameMap->{$currentLocation};
348        # split it by ','
349        my ($plain, $form) = split (",", $_);
350       
351        if (!defined $data->{$key}) {
352        $data->{$key} = [];
353        }
354        if (defined $plain and $plain =~ /\w/) {
355        push @{ $data->{$key} }, $plain;
356        }
357        if (defined $form and $form =~ /\w/) {
358        push @{ $data->{$key} }, $form;
359        }
360    }
361    }   
362}
363
364# This sub is for debugging purposes
365sub Display {
366    # metadataList
367    foreach my $k (keys %{$data}) {
368    print STDERR "*** metadatalist key $k\n";
369    }
370 
371    print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
372    print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
373    print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
374    print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
375    print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
376    print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
377    print STDERR "*** orthogonal build types: ".join(",",$data->{"orthogonalbuildtypes"})."\n" if (defined $data->{"orthogonalbuildtypes"});
378    print STDERR "*** search types: \n";
379    print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
380    print STDERR "*** levels: \n";
381    print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
382    print STDERR "*** index subcollections: \n";
383    print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
384    print STDERR "*** indexes: \n";
385    print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
386    print STDERR "*** index options: \n";
387    print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
388    print STDERR "*** languages: \n";
389    print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
390    print STDERR "*** language metadata: \n";
391    print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
392 
393    print STDERR "*** Plugins: \n";
394    if (defined $data->{'plugin'}) {
395    foreach $a (@{$data->{'plugin'}}) {
396        print join(",",@$a);
397        print "\n";
398    }
399    }
400
401    #print STDERR "*** Build options: \n";
402    #if (defined $data->{'store_metadata_coverage'}) {
403    #foreach $a (@{$data->{'store_metadata_coverage'}}) {
404    #    print join(",",@$a,@$_);
405    #    print "\n";
406    #}
407    #}
408
409    if (defined $data->{'classify'}) {
410    print STDERR "*** Classifiers: \n";
411    map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
412    }
413   
414    if (defined $data->{'subcollection'}) {
415    foreach my $key (keys %{$data->{'subcollection'}}) {
416        print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
417    }
418    }
419}
420# is this actually used??
421sub Doctype {
422    my ($expat, $name, $sysid, $pubid, $internal) = @_;
423
424    die if ($name !~ /^CollectionConfig$/);
425}
426
427# This Char function overrides the one in XML::Parser::Stream to overcome a
428# problem where $expat->{Text} is treated as the return value, slowing
429# things down significantly in some cases.
430sub Char {
431    if ($]<5.008) {
432    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
433    }
434    $_[0]->{'Text'} .= $_[1];
435    return undef;
436}
437
438
439
440
441#########################################################
442
4431;
Note: See TracBrowser for help on using the browser.