Changeset 20105
- Timestamp:
- 2009-07-29T14:57:54+12:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/buildConfigxml.pm
r20102 r20105 24 24 ########################################################################### 25 25 26 # reads in configuration files of xml form 26 # reads in buildConfig.xml 27 # Note, only implemented the bits that are currently used, eg by incremental 28 # build code. 29 # The resulting data is not a full representation on buildConfig.xml. 27 30 28 31 package buildConfigxml; 32 29 33 use strict; 30 34 no strict 'refs'; … … 60 64 use XML::Parser; 61 65 66 62 67 # A mapping hash to resolve name discrepancy between gs2 and gs3. 63 my $nameMap = {"key" => "value", 64 "creator" => "creator", 65 "maintainer" => "maintainer", 66 "public" => "public", 67 "infodb" => "infodbtype", 68 "defaultIndex" => "defaultindex", 69 "defaultLevel" => "defaultlevel", 70 "name" => "collectionname", 71 "description" => "collectionextra", 72 "smallicon" => "iconcollectionsmall", 73 "icon" => "iconcollection", 74 "level" => "levels", 75 "classifier" => "classify", 76 "indexSubcollection" => "indexsubcollections", 77 "indexLanguage" => "languages", 78 "defaultIndexLanguage" => "defaultlanguage", 79 "index" => "indexes", 80 "plugin" => "plugin", 81 "plugout" => "plugout", 82 "indexOption" => "indexoptions", 83 "searchType" => "searchtype", 84 "languageMetadata" => "languagemetadata", 68 my $nameMap = {"numDocs" => "numdocs", 85 69 "buildType" => "buildtype" 86 70 }; 71 72 87 73 # A hash structure which is returned by sub read_cfg_file. 88 74 my $data = {}; 89 75 90 my $repeatedBlock = q/^(browse|pluginList)$/;91 92 76 # use those unique attribute values to locate the text within the elements 93 # creator, public, maintainer and within a displayItem.94 77 my $currentLocation = ""; 95 my $stringexp = q/^(creator|maintainer|public|buildType)$/; 96 my $displayItemNames = q/^(name|description)$/; 78 my $stringexp = q/^(buildType|numDocs)$/; 97 79 98 # For storing the attributes during the StartTag subroutine, so that 99 # we can use it later in Text (or EndTag) subroutines 100 my $currentAttrRef = undef; 101 102 my $currentLevel = ""; 103 104 # Count the elements with same name within the same block 105 # ("plugin", "option") 106 my $currentIndex = 0; 107 my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/; 108 my $arrayarrayexp= q/^(plugin|classifier)$/; 109 my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here 110 my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here. 111 112 my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/; 113 114 sub StartTag { 115 # Those marked with #@ will not be executed at the same time when this sub is being called 116 # so that if/elsif is used to avoid unnecessary tests 117 my ($expat, $element) = @_; 118 119 # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream 120 # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes 121 # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text). 122 $currentAttrRef = \%_; 123 124 my $name = $_{'name'}; 125 my $value = $_{'value'}; 126 my $type = $_{'type'}; 127 128 # for subcollections 129 my $filter = $_{'filter'}; 130 131 #@ Marking repeated block 132 if ($element =~ /$repeatedBlock/) { 133 $currentIndex = 0; 134 } 135 136 #@ handling block metadataList 137 elsif (defined $name and $name =~ /$stringexp/){ 138 $currentLocation = $name; 139 } 140 #@ handling default search index/level/indexLanguage and languageMetadata 141 elsif ($element =~ /$defaults/) { 142 if (defined $name and $name =~ /\w/) { 143 $data->{$nameMap->{$element}} = $name; 144 } 145 } 146 147 #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2) 148 elsif($element eq "displayItemList") { 149 $currentLevel = "displayItemList"; # storing the parent if it is displayItemList 150 } 151 elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements 152 if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) { 153 # either when there is no "assigned" attribute, or when assigned=true (for displayItems): 154 $currentLocation = $name; 155 } 156 } 157 158 #@ Handling database type: gdbm or gdbm-txtgz, later jdbm. 159 elsif ($element eq "infodb") { 160 $data->{'infodbtype'} = $type; 161 } 162 163 #@ Handling indexer: mgpp/mg/lucene; stringexp 164 elsif ($element eq "search") { 165 $data->{'buildtype'} = $type; 166 } 167 168 #@ Handling searchtype: plain,form; arrayexp 169 #elsif ($element eq "format" and defined $name and $name =~ /searchType/) { 170 #@ Handling searchtype: plain, form 171 #$currentLocation = $name; 172 #} 173 174 #@ Handle index|level|indexSubcollection|indexLanguage 175 elsif ($element =~ /$arrayexp/) { 176 my $key = $nameMap->{$element}; 177 if (!defined $data->{$key}) { 178 $data->{$key} = []; 179 } 180 181 push (@{$data->{$key}},$name); 182 } 183 184 #@ indexoptions: accentfold/casefold/stem; arrayexp 185 elsif ($element eq "indexOption") { 186 $currentLevel = "indexOption"; 187 } 188 if ($currentLevel eq "indexOption" and $element eq "option") { 189 my $key = $nameMap->{$currentLevel}; 190 if (!defined $data->{$key}) { 191 $data->{$key} = []; 192 } 193 push (@{$data->{$key}},$name); 194 } 195 #@ plugout options 196 elsif ($element eq "plugout") { 197 $currentLevel = "plugout"; 198 my $key = $nameMap->{$currentLevel}; 199 if (!defined $data->{$key}) { 200 $data->{$key} = []; 201 } 202 if(defined $name and $name ne ""){ 203 push (@{$data->{$key}},$name); 204 } 205 else{ 206 push (@{$data->{$key}},"GreenstoneXMLPlugout"); 207 } 208 } 209 if ($currentLevel eq "plugout" and $element eq "option") { 210 my $key = $nameMap->{$currentLevel}; 211 if (defined $name and $name ne ""){ 212 push (@{$data->{$key}},$name); 213 } 214 if (defined $value and $value ne ""){ 215 push (@{$data->{$key}},$value); 216 } 217 } 218 219 #@ use hash of hash of strings: hashexp 220 elsif ($element =~ /$hashexp/) { 221 if (!defined $data->{$element}) { 222 $data->{$element} = {}; 223 } 224 if (defined $name and $name =~ /\w/) { 225 if (defined $filter and $filter =~ /\w/) { 226 $data->{$element}->{$name} = $filter; 227 228 } 229 } 230 } 231 232 #@ Handling each classifier/plugin element 233 elsif ($element =~ /$arrayarrayexp/) { 234 # find the gs2 mapping name 235 $currentLevel = $element; 236 my $key = $nameMap->{$element}; 237 238 # define an array of array of strings foreach $k (@{$data->{$key}}) { 239 if (!defined $data->{$key}) { 240 $data->{$key} = []; 241 } 242 # Push classifier/plugin name (e.g. AZList) into $data as the first string 243 push (@{$data->{$key}->[$currentIndex]},$name); 244 #print $currentIndex."indexup\n"; 245 } 246 247 #@ Handling the option elements in each classifier/plugin element (as the following strings) 248 elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") { 249 # find the gs2 mapping name for classifier and plugin 250 my $key = $nameMap->{$currentLevel}; 251 252 if (defined $name and $name =~ /\w/) { 253 push (@{$data->{$key}->[$currentIndex]}, $name); 254 } 255 if (defined $value and $value =~ /\w/) { 256 push (@{$data->{$key}->[$currentIndex]}, $value); 257 } 258 259 } 260 } 261 262 sub EndTag { 263 my ($expat, $element) = @_; 264 my $endTags = q/^(browse|pluginList|displayItemList)$/; 265 if ($element =~ /$endTags/) { 266 $currentIndex = 0; 267 $currentLevel = ""; 268 } 269 # $arrayarrayexp contains classifier|plugin 270 elsif($element =~ /$arrayarrayexp/ ){ 271 $currentIndex = $currentIndex + 1; 272 } 273 } 274 275 sub Text { 276 if (defined $currentLocation) { 277 #@ Handling block metadataList(creator, maintainer, public) 278 if($currentLocation =~ /$stringexp/){ 279 #print $currentLocation; 280 my $key = $nameMap->{$currentLocation}; 281 $data->{$key} = $_; 282 undef $currentLocation; 283 } 284 285 #@ Handling displayItem metadata that are children of displayItemList 286 # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2). 287 elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) { 288 my $lang = $currentAttrRef->{'lang'}; 289 my $name = $currentAttrRef->{'name'}; 290 291 # this is how data->collectionmeta's language is set in Greenstone 2. 292 # Need to be consistent, since export.pl accesses these values all in the same way 293 if(!defined $lang) { 294 $lang = 'default'; 295 } else { 296 $lang = "[l=$lang]"; 297 } 298 299 if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description' 300 # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description' 301 $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed 302 #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n"; 303 } 304 undef $currentLocation; 305 } 306 307 #@ Handling searchtype: plain,form; arrayexp 308 elsif (defined $currentLocation and $currentLocation =~ /searchType/) { 309 # map 'searchType' into 'searchtype' 310 my $key = $nameMap->{$currentLocation}; 311 # split it by ',' 312 my ($plain, $form) = split (",", $_); 313 314 if (!defined $data->{$key}) { 315 $data->{$key} = []; 316 } 317 if (defined $plain and $plain =~ /\w/) { 318 push @{ $data->{$key} }, $plain; 319 } 320 if (defined $form and $form =~ /\w/) { 321 push @{ $data->{$key} }, $form; 322 } 323 } 324 } 325 } 326 327 # This sub is for debugging purposes 328 sub Display { 329 # metadataList 330 foreach my $k (keys %{$data}) { 331 print STDERR "*** metadatalist key $k\n"; 332 } 333 334 print $data->{'creator'}."\n" if (defined $data->{'creator'}); 335 print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"}); 336 print $data->{"public"}."\n" if (defined $data->{"public"}); 337 print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"}); 338 print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"}); 339 print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"}); 340 print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"}); 341 print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'}); 342 print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'}); 343 print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'}); 344 print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'}); 345 print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'}); 346 print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'}); 347 348 if (defined $data->{'plugin'}) { 349 foreach $a (@{$data->{'plugin'}}) { 350 print join(",",@$a); 351 print "\n"; 352 } 353 } 354 if (defined $data->{'classify'}) { 355 print "Classifiers: \n"; 356 map { print join(",",@$_)."\n"; } @{$data->{'classify'}}; 357 } 358 359 if (defined $data->{'subcollection'}) { 360 foreach my $key (keys %{$data->{'subcollection'}}) { 361 print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n"; 362 } 363 } 364 } 365 sub Doctype { 366 my ($expat, $name, $sysid, $pubid, $internal) = @_; 367 368 # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files 369 # to be processed as well as the "DirectoryMetadata" files which should now 370 # be created by import.pl 371 die if ($name !~ /^(Greenstone)?DirectoryMetadata$/); 372 } 373 374 # This Char function overrides the one in XML::Parser::Stream to overcome a 375 # problem where $expat->{Text} is treated as the return value, slowing 376 # things down significantly in some cases. 377 sub Char { 378 if ($]<5.008) { 379 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 380 } 381 $_[0]->{'Text'} .= $_[1]; 382 return undef; 383 } 80 my $indexmap_name = ""; 81 my $haveindexfields = 0; 384 82 385 83 # Reads in the model collection configuration file, collectionConfig.xml, … … 389 87 my ($filename) = @_; 390 88 $data = {}; 391 if ( ($filename !~ /collectionConfig\.xml$/ && $filename !~ /buildConfig\.xml$/)|| !-f $filename) {89 if ($filename !~ /buildConfig\.xml$/ || !-f $filename) { 392 90 return undef; 393 91 } … … 412 110 413 111 if (!open (COLCFG, $filename)) { 414 print STDERR " cfgread::read_cfg_file couldn't read the cfg file $filename\n";112 print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n"; 415 113 } else { 416 114 … … 422 120 return $data; 423 121 } 122 123 sub StartTag { 124 # Those marked with #@ will not be executed at the same time when this sub is being called 125 # so that if/elsif is used to avoid unnecessary tests 126 my ($expat, $element) = @_; 127 128 my $name = $_{'name'}; 129 my $shortname = $_{'shortname'}; 130 131 132 #@ handling block metadataList 133 if (defined $name and $name =~ /$stringexp/){ 134 $currentLocation = $name; 135 # the value will be retrieved later in Text sub 136 } 137 138 #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene) 139 elsif ($element =~ /^indexList$/) { 140 # set up the data arrays 141 # this assumes that the build type has been read already, which is 142 # currently the order we save the file in. 143 if ($data->{'buildtype'} eq "mg") { 144 $indexmap_name = "indexmap"; 145 if (!defined $data->{"indexmap"}) { 146 $data->{"indexmap"} = []; 147 } 148 } 149 else { 150 $indexmap_name = "indexfieldmap"; 151 $haveindexfields = 1; 152 if (!defined $data->{"indexfieldmap"}) { 153 $data->{"indexfieldmap"} = []; 154 } 155 if (!defined $data->{"indexfields"}) { 156 $data->{"indexfields"} = []; 157 } 158 159 } 160 161 } 162 163 elsif ($element =~ /index/) { 164 # store each index in the map 165 if (defined $name && defined $shortname) { 166 push @{$data->{$indexmap_name}}, "$name->$shortname"; 167 if ($haveindexfields) { 168 push @{$data->{'indexfields'}}, $name; 169 } 170 } 171 } 172 173 174 } 175 176 sub EndTag { 177 my ($expat, $element) = @_; 178 } 179 180 sub Text { 181 if (defined $currentLocation) { 182 #@ Handling block metadataList(numDocs, buildType) 183 if($currentLocation =~ /$stringexp/){ 184 #print $currentLocation; 185 my $key = $nameMap->{$currentLocation}; 186 $data->{$key} = $_; 187 undef $currentLocation; 188 } 189 } 190 } 191 192 # This sub is for debugging purposes 193 sub Display { 194 195 print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'}); 196 print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'}); 197 print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'}); 198 print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'}); 199 print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'}); 200 201 } 202 203 # is this actually used?? 204 sub Doctype { 205 my ($expat, $name, $sysid, $pubid, $internal) = @_; 206 207 die if ($name !~ /^buildConfig$/); 208 } 209 210 # This Char function overrides the one in XML::Parser::Stream to overcome a 211 # problem where $expat->{Text} is treated as the return value, slowing 212 # things down significantly in some cases. 213 sub Char { 214 if ($]<5.008) { 215 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 216 } 217 $_[0]->{'Text'} .= $_[1]; 218 return undef; 219 } 220 424 221 425 222
Note:
See TracChangeset
for help on using the changeset viewer.