Changeset 20105


Ignore:
Timestamp:
2009-07-29T14:57:54+12:00 (15 years ago)
Author:
kjdon
Message:

got rid of all stuff not currently usesd (left over from collectionConfig parsing)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/buildConfigxml.pm

    r20102 r20105  
    2424###########################################################################
    2525
    26 # reads in configuration files of xml form
     26# reads in buildConfig.xml
     27# Note, only implemented the bits that are currently used, eg by incremental
     28# build code.
     29# The resulting data is not a full representation on buildConfig.xml.
    2730
    2831package buildConfigxml;
     32
    2933use strict;
    3034no strict 'refs';
     
    6064use XML::Parser;
    6165
     66
    6267# A mapping hash to resolve name discrepancy between gs2 and gs3.
    63 my $nameMap = {"key" => "value",
    64            "creator" => "creator",
    65            "maintainer" => "maintainer",
    66            "public" => "public",
    67            "infodb" => "infodbtype",
    68            "defaultIndex" => "defaultindex",
    69            "defaultLevel" => "defaultlevel",
    70            "name" => "collectionname",
    71            "description" => "collectionextra",
    72            "smallicon" => "iconcollectionsmall",
    73            "icon" => "iconcollection",
    74            "level" => "levels",
    75            "classifier" => "classify",
    76            "indexSubcollection" => "indexsubcollections",
    77            "indexLanguage" => "languages",
    78            "defaultIndexLanguage" => "defaultlanguage",
    79            "index" => "indexes",
    80            "plugin" => "plugin",
    81            "plugout" => "plugout",
    82            "indexOption" => "indexoptions",
    83            "searchType" => "searchtype",
    84            "languageMetadata" => "languagemetadata",
     68my $nameMap = {"numDocs" => "numdocs",
    8569           "buildType" => "buildtype"
    8670           };
     71
     72
    8773# A hash structure which is returned by sub read_cfg_file.
    8874my $data = {};
    8975
    90 my $repeatedBlock = q/^(browse|pluginList)$/;
    91 
    9276# use those unique attribute values to locate the text within the elements
    93 # creator, public, maintainer and within a displayItem.
    9477my $currentLocation = "";
    95 my $stringexp = q/^(creator|maintainer|public|buildType)$/;
    96 my $displayItemNames = q/^(name|description)$/;
     78my $stringexp = q/^(buildType|numDocs)$/;
    9779 
    98 # For storing the attributes during the StartTag subroutine, so that
    99 # we can use it later in Text (or EndTag) subroutines
    100 my $currentAttrRef = undef;
    101 
    102 my $currentLevel = "";
    103 
    104 # Count the elements with same name within the same block
    105 # ("plugin", "option")
    106 my $currentIndex = 0;
    107 my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
    108 my $arrayarrayexp= q/^(plugin|classifier)$/;
    109 my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
    110 my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
    111 
    112 my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
    113 
    114 sub StartTag {
    115 # Those marked with #@ will not be executed at the same time when this sub is being called
    116 # so that if/elsif is used to avoid unnecessary tests
    117     my ($expat, $element) = @_;
    118    
    119     # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
    120     # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
    121     # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
    122     $currentAttrRef = \%_;
    123 
    124     my $name = $_{'name'};
    125     my $value = $_{'value'};
    126     my $type = $_{'type'};
    127 
    128     # for subcollections
    129     my $filter = $_{'filter'};
    130    
    131     #@ Marking repeated block
    132     if ($element =~ /$repeatedBlock/) {
    133     $currentIndex = 0;
    134     }
    135 
    136     #@ handling block metadataList
    137     elsif (defined $name and $name =~ /$stringexp/){
    138       $currentLocation = $name;
    139     }
    140     #@ handling default search index/level/indexLanguage and languageMetadata
    141     elsif ($element =~ /$defaults/) {
    142       if (defined $name and $name =~ /\w/) {
    143     $data->{$nameMap->{$element}} = $name;
    144       }
    145     }
    146 
    147     #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
    148     elsif($element eq "displayItemList") {
    149     $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
    150     }
    151     elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
    152     if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
    153         # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
    154         $currentLocation = $name;
    155     }
    156     }
    157 
    158     #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
    159     elsif ($element eq "infodb") {
    160       $data->{'infodbtype'} = $type;
    161     }
    162    
    163     #@ Handling indexer: mgpp/mg/lucene; stringexp
    164     elsif ($element eq "search") {
    165       $data->{'buildtype'} = $type;
    166     }
    167 
    168     #@ Handling searchtype: plain,form; arrayexp
    169     #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
    170     #@ Handling searchtype: plain, form
    171     #$currentLocation = $name; 
    172     #}
    173  
    174     #@ Handle index|level|indexSubcollection|indexLanguage
    175     elsif ($element =~ /$arrayexp/) {
    176       my $key = $nameMap->{$element};   
    177       if (!defined $data->{$key}) {
    178     $data->{$key} = [];
    179       }
    180 
    181       push (@{$data->{$key}},$name);
    182     }
    183 
    184     #@ indexoptions: accentfold/casefold/stem; arrayexp
    185     elsif ($element eq "indexOption") {
    186       $currentLevel = "indexOption";
    187     }
    188     if ($currentLevel eq "indexOption" and $element eq "option") {
    189       my $key = $nameMap->{$currentLevel}; 
    190       if (!defined $data->{$key}) {
    191     $data->{$key} = [];
    192       }
    193       push (@{$data->{$key}},$name);
    194     }
    195     #@ plugout options
    196     elsif ($element eq "plugout") {
    197     $currentLevel = "plugout";
    198     my $key = $nameMap->{$currentLevel};   
    199     if (!defined $data->{$key}) {
    200         $data->{$key} = [];
    201     }
    202     if(defined $name and $name ne ""){
    203         push (@{$data->{$key}},$name);
    204     }
    205     else{
    206        push (@{$data->{$key}},"GreenstoneXMLPlugout");
    207     }
    208     }
    209     if ($currentLevel eq "plugout" and $element eq "option") {     
    210     my $key = $nameMap->{$currentLevel};
    211     if (defined $name and $name ne ""){
    212         push (@{$data->{$key}},$name);
    213     }
    214     if (defined $value and $value ne  ""){
    215         push (@{$data->{$key}},$value);
    216     }
    217     }
    218 
    219     #@ use hash of hash of strings: hashexp
    220     elsif ($element =~ /$hashexp/) {
    221       if (!defined $data->{$element}) {
    222     $data->{$element} = {};
    223       }
    224       if (defined $name and $name =~ /\w/) {
    225     if (defined $filter and $filter =~ /\w/) {
    226       $data->{$element}->{$name} = $filter;
    227 
    228     }
    229       }
    230     }
    231 
    232     #@ Handling each classifier/plugin element
    233     elsif ($element =~ /$arrayarrayexp/) {
    234     # find the gs2 mapping name
    235         $currentLevel = $element;
    236         my $key = $nameMap->{$element};
    237 
    238     # define an array of array of strings   foreach $k (@{$data->{$key}}) {
    239     if (!defined $data->{$key}) {
    240       $data->{$key} = [];
    241     }
    242     # Push classifier/plugin name (e.g. AZList) into $data as the first string
    243     push (@{$data->{$key}->[$currentIndex]},$name);
    244     #print $currentIndex."indexup\n";
    245       }
    246    
    247     #@ Handling the option elements in each classifier/plugin element (as the following strings)
    248     elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
    249     # find the gs2 mapping name for classifier and plugin
    250         my $key = $nameMap->{$currentLevel};   
    251 
    252     if (defined $name and $name =~ /\w/) {
    253         push (@{$data->{$key}->[$currentIndex]}, $name);
    254     }
    255     if (defined $value and $value =~ /\w/) {
    256             push (@{$data->{$key}->[$currentIndex]}, $value);
    257     }
    258 
    259     }
    260 }
    261 
    262 sub EndTag {
    263     my ($expat, $element) = @_;
    264     my $endTags = q/^(browse|pluginList|displayItemList)$/;   
    265     if ($element =~ /$endTags/) {
    266         $currentIndex = 0;
    267         $currentLevel = "";
    268     }
    269     # $arrayarrayexp contains classifier|plugin
    270     elsif($element =~ /$arrayarrayexp/ ){
    271         $currentIndex = $currentIndex + 1;
    272     }
    273 }
    274 
    275 sub Text {
    276     if (defined $currentLocation) {
    277     #@ Handling block metadataList(creator, maintainer, public)
    278     if($currentLocation =~ /$stringexp/){
    279         #print $currentLocation;
    280         my $key = $nameMap->{$currentLocation};
    281         $data->{$key} = $_;
    282         undef $currentLocation;
    283     }
    284    
    285     #@ Handling displayItem metadata that are children of displayItemList
    286     # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
    287     elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
    288         my $lang = $currentAttrRef->{'lang'};
    289         my $name = $currentAttrRef->{'name'};
    290        
    291         # this is how data->collectionmeta's language is set in Greenstone 2.
    292         # Need to be consistent, since export.pl accesses these values all in the same way
    293         if(!defined $lang) {
    294         $lang = 'default';
    295         } else {
    296         $lang = "[l=$lang]";
    297         }
    298        
    299         if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
    300         # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
    301         $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
    302         #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
    303         }
    304         undef $currentLocation;
    305     }
    306  
    307     #@ Handling searchtype: plain,form; arrayexp
    308     elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
    309         # map 'searchType' into 'searchtype'
    310         my $key = $nameMap->{$currentLocation};
    311         # split it by ','
    312         my ($plain, $form) = split (",", $_);
    313        
    314         if (!defined $data->{$key}) {
    315         $data->{$key} = [];
    316         }
    317         if (defined $plain and $plain =~ /\w/) {
    318         push @{ $data->{$key} }, $plain;
    319         }
    320         if (defined $form and $form =~ /\w/) {
    321         push @{ $data->{$key} }, $form;
    322         }
    323     }
    324     }   
    325 }
    326 
    327 # This sub is for debugging purposes
    328 sub Display {
    329     # metadataList
    330     foreach my $k (keys %{$data}) {
    331     print STDERR "*** metadatalist key $k\n";
    332     }
    333  
    334     print $data->{'creator'}."\n" if (defined $data->{'creator'});
    335     print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
    336     print $data->{"public"}."\n" if (defined $data->{"public"});
    337     print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
    338     print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
    339     print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
    340     print  join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
    341     print  join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
    342     print  join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
    343     print  join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
    344     print  join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
    345     print  join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
    346     print  join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
    347  
    348     if (defined $data->{'plugin'}) {
    349     foreach $a (@{$data->{'plugin'}}) {
    350         print join(",",@$a);
    351         print "\n";
    352     }
    353     }
    354     if (defined $data->{'classify'}) {
    355     print "Classifiers: \n";
    356     map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
    357     }
    358    
    359     if (defined $data->{'subcollection'}) {
    360     foreach my $key (keys %{$data->{'subcollection'}}) {
    361         print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
    362     }
    363     }
    364 }
    365 sub Doctype {
    366     my ($expat, $name, $sysid, $pubid, $internal) = @_;
    367 
    368     # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files
    369     # to be processed as well as the "DirectoryMetadata" files which should now
    370     # be created by import.pl
    371     die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);
    372 }
    373 
    374 # This Char function overrides the one in XML::Parser::Stream to overcome a
    375 # problem where $expat->{Text} is treated as the return value, slowing
    376 # things down significantly in some cases.
    377 sub Char {
    378     if ($]<5.008) {
    379     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
    380     }
    381     $_[0]->{'Text'} .= $_[1];
    382     return undef;
    383 }
     80my $indexmap_name = "";
     81my $haveindexfields = 0;
    38482
    38583# Reads in the model collection configuration file, collectionConfig.xml,
     
    38987    my ($filename) = @_;
    39088    $data = {};
    391     if (($filename !~ /collectionConfig\.xml$/ && $filename !~ /buildConfig\.xml$/) || !-f $filename) {
     89    if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
    39290        return undef;
    39391    }
     
    412110
    413111    if (!open (COLCFG, $filename)) {
    414     print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
     112    print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
    415113    } else {
    416114
     
    422120    return $data;
    423121}
     122
     123sub StartTag {
     124# Those marked with #@ will not be executed at the same time when this sub is being called
     125# so that if/elsif is used to avoid unnecessary tests
     126    my ($expat, $element) = @_;
     127   
     128    my $name = $_{'name'};
     129    my $shortname = $_{'shortname'};
     130
     131   
     132    #@ handling block metadataList
     133    if (defined $name and $name =~ /$stringexp/){
     134      $currentLocation = $name;
     135      # the value will be retrieved later in Text sub
     136    }
     137
     138    #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
     139    elsif ($element =~ /^indexList$/) {
     140    # set up the data arrays
     141    # this assumes that the build type has been read already, which is
     142    # currently the order we save the file in.
     143    if ($data->{'buildtype'} eq "mg") {
     144        $indexmap_name = "indexmap";
     145        if (!defined $data->{"indexmap"}) {
     146        $data->{"indexmap"} = [];
     147        }
     148    }
     149    else {
     150        $indexmap_name = "indexfieldmap";
     151        $haveindexfields = 1;
     152        if (!defined $data->{"indexfieldmap"}) {
     153        $data->{"indexfieldmap"} = [];
     154        }
     155        if (!defined $data->{"indexfields"}) {
     156        $data->{"indexfields"} = [];
     157        }
     158
     159    }
     160   
     161    }
     162   
     163    elsif ($element =~ /index/) {
     164    # store each index in the map
     165    if (defined $name && defined $shortname) {
     166        push @{$data->{$indexmap_name}}, "$name->$shortname";
     167        if ($haveindexfields) {
     168        push @{$data->{'indexfields'}}, $name;
     169        }
     170    }
     171    }
     172
     173
     174}
     175
     176sub EndTag {
     177    my ($expat, $element) = @_;
     178}
     179
     180sub Text {
     181    if (defined $currentLocation) {
     182    #@ Handling block metadataList(numDocs, buildType)
     183    if($currentLocation =~ /$stringexp/){
     184        #print $currentLocation;
     185        my $key = $nameMap->{$currentLocation};
     186        $data->{$key} = $_;
     187        undef $currentLocation;
     188    }   
     189    }   
     190}
     191
     192# This sub is for debugging purposes
     193sub Display {
     194
     195    print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
     196    print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
     197    print  "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
     198    print  "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
     199    print  "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
     200
     201}
     202
     203# is this actually used??
     204sub Doctype {
     205    my ($expat, $name, $sysid, $pubid, $internal) = @_;
     206
     207    die if ($name !~ /^buildConfig$/);
     208}
     209
     210# This Char function overrides the one in XML::Parser::Stream to overcome a
     211# problem where $expat->{Text} is treated as the return value, slowing
     212# things down significantly in some cases.
     213sub Char {
     214    if ($]<5.008) {
     215    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
     216    }
     217    $_[0]->{'Text'} .= $_[1];
     218    return undef;
     219}
     220
    424221
    425222
Note: See TracChangeset for help on using the changeset viewer.