Changeset 20105

Show
Ignore:
Timestamp:
29.07.2009 14:57:54 (10 years ago)
Author:
kjdon
Message:

got rid of all stuff not currently usesd (left over from collectionConfig parsing)

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/buildConfigxml.pm

    r20102 r20105  
    2424########################################################################### 
    2525 
    26 # reads in configuration files of xml form 
     26# reads in buildConfig.xml  
     27# Note, only implemented the bits that are currently used, eg by incremental  
     28# build code. 
     29# The resulting data is not a full representation on buildConfig.xml. 
    2730 
    2831package buildConfigxml; 
     32 
    2933use strict; 
    3034no strict 'refs'; 
     
    6064use XML::Parser; 
    6165 
     66 
    6267# A mapping hash to resolve name discrepancy between gs2 and gs3. 
    63 my $nameMap = {"key" => "value", 
    64            "creator" => "creator", 
    65            "maintainer" => "maintainer", 
    66            "public" => "public", 
    67            "infodb" => "infodbtype", 
    68            "defaultIndex" => "defaultindex", 
    69            "defaultLevel" => "defaultlevel", 
    70            "name" => "collectionname", 
    71            "description" => "collectionextra", 
    72            "smallicon" => "iconcollectionsmall", 
    73            "icon" => "iconcollection", 
    74            "level" => "levels", 
    75            "classifier" => "classify", 
    76            "indexSubcollection" => "indexsubcollections", 
    77            "indexLanguage" => "languages", 
    78            "defaultIndexLanguage" => "defaultlanguage", 
    79            "index" => "indexes", 
    80            "plugin" => "plugin", 
    81            "plugout" => "plugout", 
    82            "indexOption" => "indexoptions", 
    83            "searchType" => "searchtype", 
    84            "languageMetadata" => "languagemetadata", 
     68my $nameMap = {"numDocs" => "numdocs", 
    8569           "buildType" => "buildtype" 
    8670           }; 
     71 
     72 
    8773# A hash structure which is returned by sub read_cfg_file. 
    8874my $data = {}; 
    8975 
    90 my $repeatedBlock = q/^(browse|pluginList)$/; 
    91  
    9276# use those unique attribute values to locate the text within the elements 
    93 # creator, public, maintainer and within a displayItem. 
    9477my $currentLocation = ""; 
    95 my $stringexp = q/^(creator|maintainer|public|buildType)$/; 
    96 my $displayItemNames = q/^(name|description)$/; 
     78my $stringexp = q/^(buildType|numDocs)$/; 
    9779   
    98 # For storing the attributes during the StartTag subroutine, so that  
    99 # we can use it later in Text (or EndTag) subroutines 
    100 my $currentAttrRef = undef;  
    101  
    102 my $currentLevel = ""; 
    103  
    104 # Count the elements with same name within the same block 
    105 # ("plugin", "option") 
    106 my $currentIndex = 0; 
    107 my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/; 
    108 my $arrayarrayexp= q/^(plugin|classifier)$/; 
    109 my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here 
    110 my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here. 
    111  
    112 my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/; 
    113  
    114 sub StartTag { 
    115 # Those marked with #@ will not be executed at the same time when this sub is being called 
    116 # so that if/elsif is used to avoid unnecessary tests 
    117     my ($expat, $element) = @_; 
    118      
    119     # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream 
    120     # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes 
    121     # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).  
    122     $currentAttrRef = \%_;  
    123  
    124     my $name = $_{'name'}; 
    125     my $value = $_{'value'}; 
    126     my $type = $_{'type'}; 
    127  
    128     # for subcollections 
    129     my $filter = $_{'filter'}; 
    130      
    131     #@ Marking repeated block 
    132     if ($element =~ /$repeatedBlock/) { 
    133     $currentIndex = 0; 
    134     } 
    135  
    136     #@ handling block metadataList 
    137     elsif (defined $name and $name =~ /$stringexp/){ 
    138       $currentLocation = $name; 
    139     } 
    140     #@ handling default search index/level/indexLanguage and languageMetadata 
    141     elsif ($element =~ /$defaults/) { 
    142       if (defined $name and $name =~ /\w/) { 
    143     $data->{$nameMap->{$element}} = $name; 
    144       } 
    145     } 
    146  
    147     #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2) 
    148     elsif($element eq "displayItemList") { 
    149     $currentLevel = "displayItemList"; # storing the parent if it is displayItemList 
    150     }  
    151     elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements 
    152     if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) { 
    153         # either when there is no "assigned" attribute, or when assigned=true (for displayItems): 
    154         $currentLocation = $name; 
    155     } 
    156     } 
    157  
    158     #@ Handling database type: gdbm or gdbm-txtgz, later jdbm. 
    159     elsif ($element eq "infodb") { 
    160       $data->{'infodbtype'} = $type; 
    161     } 
    162      
    163     #@ Handling indexer: mgpp/mg/lucene; stringexp 
    164     elsif ($element eq "search") { 
    165       $data->{'buildtype'} = $type; 
    166     } 
    167  
    168     #@ Handling searchtype: plain,form; arrayexp 
    169     #elsif ($element eq "format" and defined $name and $name =~ /searchType/) { 
    170     #@ Handling searchtype: plain, form 
    171     #$currentLocation = $name;   
    172     #} 
    173   
    174     #@ Handle index|level|indexSubcollection|indexLanguage 
    175     elsif ($element =~ /$arrayexp/) { 
    176       my $key = $nameMap->{$element};    
    177       if (!defined $data->{$key}) { 
    178     $data->{$key} = []; 
    179       } 
    180  
    181       push (@{$data->{$key}},$name); 
    182     } 
    183  
    184     #@ indexoptions: accentfold/casefold/stem; arrayexp 
    185     elsif ($element eq "indexOption") { 
    186       $currentLevel = "indexOption"; 
    187     } 
    188     if ($currentLevel eq "indexOption" and $element eq "option") { 
    189       my $key = $nameMap->{$currentLevel};   
    190       if (!defined $data->{$key}) { 
    191     $data->{$key} = []; 
    192       } 
    193       push (@{$data->{$key}},$name); 
    194     } 
    195     #@ plugout options 
    196     elsif ($element eq "plugout") { 
    197     $currentLevel = "plugout"; 
    198     my $key = $nameMap->{$currentLevel};     
    199     if (!defined $data->{$key}) { 
    200         $data->{$key} = []; 
    201     } 
    202     if(defined $name and $name ne ""){ 
    203         push (@{$data->{$key}},$name); 
    204     } 
    205     else{ 
    206        push (@{$data->{$key}},"GreenstoneXMLPlugout");  
    207     } 
    208     } 
    209     if ($currentLevel eq "plugout" and $element eq "option") {      
    210     my $key = $nameMap->{$currentLevel}; 
    211     if (defined $name and $name ne ""){ 
    212         push (@{$data->{$key}},$name); 
    213     } 
    214     if (defined $value and $value ne  ""){ 
    215         push (@{$data->{$key}},$value); 
    216     } 
    217     } 
    218  
    219     #@ use hash of hash of strings: hashexp 
    220     elsif ($element =~ /$hashexp/) { 
    221       if (!defined $data->{$element}) { 
    222     $data->{$element} = {}; 
    223       } 
    224       if (defined $name and $name =~ /\w/) { 
    225     if (defined $filter and $filter =~ /\w/) { 
    226       $data->{$element}->{$name} = $filter; 
    227  
    228     } 
    229       } 
    230     } 
    231  
    232     #@ Handling each classifier/plugin element 
    233     elsif ($element =~ /$arrayarrayexp/) { 
    234     # find the gs2 mapping name 
    235         $currentLevel = $element; 
    236         my $key = $nameMap->{$element};  
    237  
    238     # define an array of array of strings   foreach $k (@{$data->{$key}}) { 
    239     if (!defined $data->{$key}) { 
    240       $data->{$key} = []; 
    241     } 
    242     # Push classifier/plugin name (e.g. AZList) into $data as the first string 
    243     push (@{$data->{$key}->[$currentIndex]},$name); 
    244     #print $currentIndex."indexup\n"; 
    245       } 
    246      
    247     #@ Handling the option elements in each classifier/plugin element (as the following strings) 
    248     elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") { 
    249     # find the gs2 mapping name for classifier and plugin 
    250         my $key = $nameMap->{$currentLevel};     
    251  
    252     if (defined $name and $name =~ /\w/) { 
    253         push (@{$data->{$key}->[$currentIndex]}, $name); 
    254     } 
    255     if (defined $value and $value =~ /\w/) { 
    256             push (@{$data->{$key}->[$currentIndex]}, $value); 
    257     } 
    258  
    259     } 
    260 } 
    261  
    262 sub EndTag { 
    263     my ($expat, $element) = @_; 
    264     my $endTags = q/^(browse|pluginList|displayItemList)$/;    
    265     if ($element =~ /$endTags/) { 
    266         $currentIndex = 0; 
    267         $currentLevel = ""; 
    268     } 
    269     # $arrayarrayexp contains classifier|plugin 
    270     elsif($element =~ /$arrayarrayexp/ ){ 
    271         $currentIndex = $currentIndex + 1; 
    272     } 
    273 } 
    274  
    275 sub Text { 
    276     if (defined $currentLocation) {  
    277     #@ Handling block metadataList(creator, maintainer, public) 
    278     if($currentLocation =~ /$stringexp/){ 
    279         #print $currentLocation; 
    280         my $key = $nameMap->{$currentLocation};  
    281         $data->{$key} = $_; 
    282         undef $currentLocation; 
    283     } 
    284      
    285     #@ Handling displayItem metadata that are children of displayItemList 
    286     # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2). 
    287     elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) { 
    288         my $lang = $currentAttrRef->{'lang'}; 
    289         my $name = $currentAttrRef->{'name'}; 
    290          
    291         # this is how data->collectionmeta's language is set in Greenstone 2.  
    292         # Need to be consistent, since export.pl accesses these values all in the same way 
    293         if(!defined $lang) { 
    294         $lang = 'default'; 
    295         } else { 
    296         $lang = "[l=$lang]";  
    297         } 
    298          
    299         if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description' 
    300         # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description' 
    301         $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed 
    302         #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n"; 
    303         } 
    304         undef $currentLocation; 
    305     } 
    306    
    307     #@ Handling searchtype: plain,form; arrayexp 
    308     elsif (defined $currentLocation and $currentLocation =~ /searchType/) { 
    309         # map 'searchType' into 'searchtype' 
    310         my $key = $nameMap->{$currentLocation}; 
    311         # split it by ',' 
    312         my ($plain, $form) = split (",", $_); 
    313          
    314         if (!defined $data->{$key}) { 
    315         $data->{$key} = []; 
    316         } 
    317         if (defined $plain and $plain =~ /\w/) { 
    318         push @{ $data->{$key} }, $plain; 
    319         } 
    320         if (defined $form and $form =~ /\w/) { 
    321         push @{ $data->{$key} }, $form; 
    322         } 
    323     } 
    324     }    
    325 } 
    326  
    327 # This sub is for debugging purposes 
    328 sub Display { 
    329     # metadataList 
    330     foreach my $k (keys %{$data}) { 
    331     print STDERR "*** metadatalist key $k\n";  
    332     } 
    333    
    334     print $data->{'creator'}."\n" if (defined $data->{'creator'}); 
    335     print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"}); 
    336     print $data->{"public"}."\n" if (defined $data->{"public"}); 
    337     print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"}); 
    338     print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"}); 
    339     print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"}); 
    340     print  join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"}); 
    341     print  join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'}); 
    342     print  join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'}); 
    343     print  join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'}); 
    344     print  join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'}); 
    345     print  join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'}); 
    346     print  join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'}); 
    347   
    348     if (defined $data->{'plugin'}) { 
    349     foreach $a (@{$data->{'plugin'}}) { 
    350         print join(",",@$a); 
    351         print "\n"; 
    352     } 
    353     } 
    354     if (defined $data->{'classify'}) { 
    355     print "Classifiers: \n"; 
    356     map { print join(",",@$_)."\n"; } @{$data->{'classify'}}; 
    357     } 
    358      
    359     if (defined $data->{'subcollection'}) { 
    360     foreach my $key (keys %{$data->{'subcollection'}}) { 
    361         print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n"; 
    362     } 
    363     } 
    364 } 
    365 sub Doctype { 
    366     my ($expat, $name, $sysid, $pubid, $internal) = @_; 
    367  
    368     # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files  
    369     # to be processed as well as the "DirectoryMetadata" files which should now 
    370     # be created by import.pl 
    371     die if ($name !~ /^(Greenstone)?DirectoryMetadata$/); 
    372 } 
    373  
    374 # This Char function overrides the one in XML::Parser::Stream to overcome a 
    375 # problem where $expat->{Text} is treated as the return value, slowing 
    376 # things down significantly in some cases. 
    377 sub Char { 
    378     if ($]<5.008) { 
    379     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 
    380     } 
    381     $_[0]->{'Text'} .= $_[1]; 
    382     return undef; 
    383 } 
     80my $indexmap_name = ""; 
     81my $haveindexfields = 0; 
    38482 
    38583# Reads in the model collection configuration file, collectionConfig.xml, 
     
    38987    my ($filename) = @_; 
    39088    $data = {}; 
    391     if (($filename !~ /collectionConfig\.xml$/ && $filename !~ /buildConfig\.xml$/) || !-f $filename) { 
     89    if ($filename !~ /buildConfig\.xml$/ || !-f $filename) { 
    39290        return undef; 
    39391    } 
     
    412110 
    413111    if (!open (COLCFG, $filename)) { 
    414     print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; 
     112    print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n"; 
    415113    } else { 
    416114 
     
    422120    return $data; 
    423121} 
     122 
     123sub StartTag { 
     124# Those marked with #@ will not be executed at the same time when this sub is being called 
     125# so that if/elsif is used to avoid unnecessary tests 
     126    my ($expat, $element) = @_; 
     127     
     128    my $name = $_{'name'}; 
     129    my $shortname = $_{'shortname'}; 
     130 
     131     
     132    #@ handling block metadataList 
     133    if (defined $name and $name =~ /$stringexp/){ 
     134      $currentLocation = $name; 
     135      # the value will be retrieved later in Text sub 
     136    } 
     137 
     138    #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene) 
     139    elsif ($element =~ /^indexList$/) { 
     140    # set up the data arrays 
     141    # this assumes that the build type has been read already, which is 
     142    # currently the order we save the file in. 
     143    if ($data->{'buildtype'} eq "mg") { 
     144        $indexmap_name = "indexmap"; 
     145        if (!defined $data->{"indexmap"}) { 
     146        $data->{"indexmap"} = []; 
     147        } 
     148    } 
     149    else { 
     150        $indexmap_name = "indexfieldmap"; 
     151        $haveindexfields = 1; 
     152        if (!defined $data->{"indexfieldmap"}) { 
     153        $data->{"indexfieldmap"} = []; 
     154        } 
     155        if (!defined $data->{"indexfields"}) { 
     156        $data->{"indexfields"} = []; 
     157        } 
     158 
     159    } 
     160     
     161    } 
     162     
     163    elsif ($element =~ /index/) { 
     164    # store each index in the map 
     165    if (defined $name && defined $shortname) { 
     166        push @{$data->{$indexmap_name}}, "$name->$shortname"; 
     167        if ($haveindexfields) { 
     168        push @{$data->{'indexfields'}}, $name; 
     169        } 
     170    } 
     171    } 
     172 
     173 
     174} 
     175 
     176sub EndTag { 
     177    my ($expat, $element) = @_; 
     178} 
     179 
     180sub Text { 
     181    if (defined $currentLocation) {  
     182    #@ Handling block metadataList(numDocs, buildType) 
     183    if($currentLocation =~ /$stringexp/){ 
     184        #print $currentLocation; 
     185        my $key = $nameMap->{$currentLocation};  
     186        $data->{$key} = $_; 
     187        undef $currentLocation; 
     188    }    
     189    }    
     190} 
     191 
     192# This sub is for debugging purposes 
     193sub Display { 
     194 
     195    print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'}); 
     196    print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'}); 
     197    print  "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'}); 
     198    print  "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'}); 
     199    print  "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'}); 
     200 
     201} 
     202 
     203# is this actually used?? 
     204sub Doctype { 
     205    my ($expat, $name, $sysid, $pubid, $internal) = @_; 
     206 
     207    die if ($name !~ /^buildConfig$/); 
     208} 
     209 
     210# This Char function overrides the one in XML::Parser::Stream to overcome a 
     211# problem where $expat->{Text} is treated as the return value, slowing 
     212# things down significantly in some cases. 
     213sub Char { 
     214    if ($]<5.008) { 
     215    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 
     216    } 
     217    $_[0]->{'Text'} .= $_[1]; 
     218    return undef; 
     219} 
     220 
    424221 
    425222