root/gsdl/trunk/perllib/buildConfigxml.pm @ 20105

Revision 20105, 17.8 KB (checked in by kjdon, 11 years ago)

got rid of all stuff not currently usesd (left over from collectionConfig parsing)

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used.  Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41    my $perl_dir;
42
43    # Note: $] encodes the version number of perl
44    if ($]>5.008) {
45    # perl 5.8.1 or above
46    $perl_dir = "perl-5.8";
47    }
48    elsif ($]<5.008) {
49    # assume perl 5.6
50    $perl_dir = "perl-5.6";
51    }
52    else {
53    print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
54    print STDERR "         Please upgrade to a newer version of Perl.\n";
55    $perl_dir = "perl-5.8";
56    }
57
58    if ($ENV{'GSDLOS'} !~ /^windows$/i) {
59    # Use push to put this on the end, so an existing XML::Parser will be used by default
60    push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
61    }
62}
63
64use XML::Parser;
65
66
67# A mapping hash to resolve name discrepancy between gs2 and gs3.
68my $nameMap = {"numDocs" => "numdocs",
69           "buildType" => "buildtype"
70           };
71
72
73# A hash structure which is returned by sub read_cfg_file.
74my $data = {};
75
76# use those unique attribute values to locate the text within the elements
77my $currentLocation = "";
78my $stringexp = q/^(buildType|numDocs)$/;
79 
80my $indexmap_name = "";
81my $haveindexfields = 0;
82
83# Reads in the model collection configuration file, collectionConfig.xml,
84# into a structure which complies with the one used by gs2 (i.e. one read
85# in by &cfgread::read_cfg_file).
86sub read_cfg_file {
87    my ($filename) = @_;
88    $data = {};
89    if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
90        return undef;
91    }
92
93    # create XML::Parser object for parsing metadata.xml files
94    my $parser;
95    if ($]<5.008) {
96    # Perl 5.6
97    $parser = new XML::Parser('Style' => 'Stream',
98                  'Handlers' => {'Char' => \&Char,
99                         'Doctype' => \&Doctype
100                         });
101    }
102    else {
103    # Perl 5.8
104    $parser = new XML::Parser('Style' => 'Stream',
105                  'ProtocolEncoding' => 'ISO-8859-1',
106                  'Handlers' => {'Char' => \&Char,
107                         'Doctype' => \&Doctype
108                         });
109    }
110
111    if (!open (COLCFG, $filename)) {
112    print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
113    } else {
114
115      $parser->parsefile ($filename);# (COLCFG);
116      close (COLCFG);
117    }
118
119    #&Display;
120    return $data;
121}
122
123sub StartTag {
124# Those marked with #@ will not be executed at the same time when this sub is being called
125# so that if/elsif is used to avoid unnecessary tests
126    my ($expat, $element) = @_;
127   
128    my $name = $_{'name'};
129    my $shortname = $_{'shortname'};
130
131   
132    #@ handling block metadataList
133    if (defined $name and $name =~ /$stringexp/){
134      $currentLocation = $name;
135      # the value will be retrieved later in Text sub
136    }
137
138    #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
139    elsif ($element =~ /^indexList$/) {
140    # set up the data arrays
141    # this assumes that the build type has been read already, which is
142    # currently the order we save the file in.
143    if ($data->{'buildtype'} eq "mg") {
144        $indexmap_name = "indexmap";
145        if (!defined $data->{"indexmap"}) {
146        $data->{"indexmap"} = [];
147        }
148    }
149    else {
150        $indexmap_name = "indexfieldmap";
151        $haveindexfields = 1;
152        if (!defined $data->{"indexfieldmap"}) {
153        $data->{"indexfieldmap"} = [];
154        }
155        if (!defined $data->{"indexfields"}) {
156        $data->{"indexfields"} = [];
157        }
158
159    }
160   
161    }
162   
163    elsif ($element =~ /index/) {
164    # store each index in the map
165    if (defined $name && defined $shortname) {
166        push @{$data->{$indexmap_name}}, "$name->$shortname";
167        if ($haveindexfields) {
168        push @{$data->{'indexfields'}}, $name;
169        }
170    }
171    }
172
173
174}
175
176sub EndTag {
177    my ($expat, $element) = @_;
178}
179
180sub Text {
181    if (defined $currentLocation) {
182    #@ Handling block metadataList(numDocs, buildType)
183    if($currentLocation =~ /$stringexp/){
184        #print $currentLocation;
185        my $key = $nameMap->{$currentLocation};
186        $data->{$key} = $_;
187        undef $currentLocation;
188    }   
189    }   
190}
191
192# This sub is for debugging purposes
193sub Display {
194
195    print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
196    print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
197    print  "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
198    print  "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
199    print  "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
200
201}
202
203# is this actually used??
204sub Doctype {
205    my ($expat, $name, $sysid, $pubid, $internal) = @_;
206
207    die if ($name !~ /^buildConfig$/);
208}
209
210# This Char function overrides the one in XML::Parser::Stream to overcome a
211# problem where $expat->{Text} is treated as the return value, slowing
212# things down significantly in some cases.
213sub Char {
214    if ($]<5.008) {
215    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
216    }
217    $_[0]->{'Text'} .= $_[1];
218    return undef;
219}
220
221
222
223sub write_line {
224    my ($filehandle, $line) = @_;
225    print $filehandle join ("", @$line), "\n";
226}
227
228# Create the buildConfig.xml file for a specific collection
229sub write_cfg_file {
230    # this sub is called in make_auxiliary_files() in basebuilder.pm
231    # the received args: $buildoutfile - destination file: buildConfig.xml
232    #                    $buildcfg - all build options, eg, disable_OAI
233    #                    $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
234    my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_;
235    my $line = [];
236
237    if (!open (COLCFG, ">$buildoutfile")) {
238    print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
239    die;
240    }
241
242    &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]); 
243
244    # output building metadata to build config file
245    my $buildtype;
246    if (defined $buildcfg->{"buildtype"}) {
247    $buildtype = $buildcfg->{"buildtype"};
248    } else {
249    $buildtype = "mgpp";
250    }
251    my $numdocs;
252    if (defined $buildcfg->{"numdocs"}) {
253      $numdocs = $buildcfg->{"numdocs"};
254    }
255    &write_line('COLCFG', ["<metadataList>"]);
256    &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
257    &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
258    &write_line('COLCFG', ["</metadataList>"]);
259
260    my $service_type = "MGPP";
261    if ($buildtype eq "mg") {
262    $service_type = "MG";
263    } elsif ($buildtype eq "lucene") {
264    $service_type = "Lucene";
265    }
266
267    # output serviceRackList
268    &write_line('COLCFG', ["<serviceRackList>"]);
269
270    # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class
271    # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml.
272    if ($disable_OAI == 0) {
273      &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]);
274      if (defined $buildcfg->{'indexstem'}) {
275        my $indexstem = $buildcfg->{'indexstem'};
276        &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
277      }
278      &write_line('COLCFG', ["</serviceRack>"]);
279    }   
280    # do the search service
281    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
282    if (defined $buildcfg->{'indexstem'}) {
283      my $indexstem = $buildcfg->{'indexstem'};
284      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
285    }
286
287    #indexes
288    # maps index name to shortname
289    my $indexmap = {};
290    # keeps the order for indexes
291    my @indexlist = ();
292   
293    my $defaultindex = "";
294    my $first = 1;
295    my $maptype = "indexfieldmap";
296    if ($buildtype eq "mg") {
297    $maptype = "indexmap";
298    }
299
300    #map {print $_."\n"} keys %$buildcfg;
301
302    if (defined $buildcfg->{$maptype}) {
303    my $indexmap_t = $buildcfg->{$maptype};
304       foreach my $i (@$indexmap_t) {
305    my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
306        $indexmap->{$k} = $v;
307        push @indexlist, $k;
308        if ($first) {
309        $defaultindex = $v;
310        $first = 0;
311        }   
312    }
313    # now if the user has assigned a default index, we use it
314    if (defined $collectcfg->{"defaultindex"}) {
315      $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
316    }
317   
318    } else {
319    print STDERR "$maptype not defined";
320    } 
321    #for each index in indexList, write them out
322    &write_line('COLCFG', ["<indexList>"]);
323    foreach my $i (@indexlist) {
324    my $index = $indexmap->{$i};
325    &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
326    }   
327    &write_line('COLCFG', ["</indexList>"]);
328
329   
330    &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
331
332
333    # do indexOptionList
334    if ($buildtype eq "mg" || $buildtype eq "mgpp") {
335        &write_line('COLCFG', ["<indexOptionList>"]);
336    my $stemindexes = 3; # default is stem and casefold
337    if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
338        $stemindexes = $buildcfg->{'stemindexes'};
339    }
340    &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
341   
342    my $maxnumeric = 4; # default
343    if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
344        $maxnumeric = $buildcfg->{'maxnumeric'};
345    }
346    &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
347        &write_line('COLCFG', ["</indexOptionList>"]);
348    }
349
350    #  levelList
351    my $levelmap = {};
352    my @levellist = ();
353    my $default_search_level = "Doc";
354    my $default_retrieve_level = "Doc";
355    my $default_db_level = "Doc";
356    $first = 1;
357    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
358    if (defined $buildcfg->{'levelmap'}) {
359        my $levelmap_t = $buildcfg->{'levelmap'};
360        foreach my $l (@$levelmap_t) {
361        my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
362        $levelmap->{$key} = $val;
363        push @levellist, $key;
364        if ($first) {
365            # let default search level follow the first level in the level list
366            $default_search_level = $val;
367            # retrieve/database levels may get modified later if text level is defined
368            $default_retrieve_level = $val;
369            $default_db_level = $val;
370            $first = 0;
371        }
372        }
373    }
374    # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
375        #if (defined $collectcfg->{"defaultlevel"}) {
376    $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
377        #  $default_retrieve_level = $default_search_level;
378    #}
379   
380    if (defined $buildcfg->{'textlevel'}) {
381       # let the retrieve/database levels always follow the textlevel
382           $default_retrieve_level = $buildcfg->{'textlevel'};
383       $default_db_level = $buildcfg->{'textlevel'};
384         
385    }
386    }
387    #for each level in levelList, write them out
388    if ($buildtype ne "mg") {
389    &write_line('COLCFG', ["<levelList>"]);
390    foreach my $lv (@levellist) {
391    my $level = $levelmap->{$lv};
392        &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
393    }   
394    &write_line('COLCFG', ["</levelList>"]);
395    }
396    # add in defaultLevel as the same level as indexLevelList, making the reading job easier
397    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
398    &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
399    }
400    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
401        &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
402    }
403    # do searchTypeList
404    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
405      &write_line('COLCFG', ["<searchTypeList>"]);
406     
407      if (defined $buildcfg->{"searchtype"}) {
408      my $searchtype_t = $buildcfg->{"searchtype"};
409      foreach my $s (@$searchtype_t) {
410      &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
411    }
412      } else {
413      &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
414      &write_line('COLCFG', ["<searchType name=\"form\" />"]);
415      }
416      &write_line('COLCFG', ["</searchTypeList>"]);
417    }
418
419    # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
420    $first = 1;
421    my $default_lang = "";
422    my $default_lang_short = "";
423    if (defined $buildcfg->{"languagemap"}) {
424      &write_line('COLCFG', ["<indexLanguageList>"]);
425
426      my $langmap_t = $buildcfg->{"languagemap"};
427      foreach my $l (@$langmap_t) {
428    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
429
430    &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
431    if ($first) {
432      $default_lang = $k; #name
433      $default_lang_short = $v; #shortname
434      $first = 0;
435    }
436      }
437
438      &write_line('COLCFG', ["</indexLanguageList>"]);
439      # now if the user has assigned a default language (as "en", "ru" etc.)
440      if (defined $collectcfg->{"defaultlanguage"}) {
441    $default_lang = $collectcfg->{"defaultlanguage"};
442      }
443      &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
444    }
445
446
447    # do indexSubcollectionList
448    my $default_subcol = "";# make it in sub scope to be used in the concatenation
449    if (defined $buildcfg->{'subcollectionmap'}) {
450      &write_line('COLCFG', ["<indexSubcollectionList>"]);
451      my $subcolmap = {};
452      my @subcollist = ();
453      $first = 1;
454      my $subcolmap_t = $buildcfg->{'subcollectionmap'};
455      foreach my $l (@$subcolmap_t) {
456    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
457    $subcolmap->{$k} = $v;
458    push @subcollist, $k;
459    if ($first) {
460      $default_subcol = $v;
461      $first = 0;
462    }
463      }
464      foreach my $sl (@subcollist) {
465    my $subcol = $subcolmap->{$sl};
466    &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
467      }
468
469      &write_line('COLCFG', ["</indexSubcollectionList>"]);
470      &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
471    }
472     
473    # close off search service
474    &write_line('COLCFG', ["</serviceRack>"]);
475
476    # do the retrieve service
477    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
478
479    # do default index
480    if (defined $buildcfg->{"languagemap"}) {
481    &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
482    }
483    if (defined $buildcfg->{'subcollectionmap'}) {
484    &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
485    }
486    if ($buildtype eq "mg") {
487      &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
488    }
489
490    if (defined $buildcfg->{'indexstem'}) {
491      my $indexstem = $buildcfg->{'indexstem'};
492      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
493    }
494    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
495      &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
496    }
497    &write_line('COLCFG', ["</serviceRack>"]);
498
499    # do the browse service
500    my $count = 1;
501    my $phind = 0;
502    my $started_classifiers = 0;
503
504    my $classifiers = $collectcfg->{"classify"};
505    foreach my $cl (@$classifiers) {
506      my $name = "CL$count";
507      $count++;
508      my ($classname) = @$cl[0];
509      if ($classname =~ /^phind$/i) {
510    $phind=1;
511    #should add it into coll config classifiers
512    next;
513      }
514     
515      if (not $started_classifiers) {
516    &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
517    if (defined $buildcfg->{'indexstem'}) {
518      my $indexstem = $buildcfg->{'indexstem'};
519      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
520    }
521    &write_line('COLCFG', ["<classifierList>"]);       
522    $started_classifiers = 1;
523      }
524      my $content = ''; #use buttonname first, then metadata
525      if ($classname eq "DateList") {
526    $content = "Date";
527      } else {
528    for (my $j=0; $j<scalar(@$cl); $j++) {
529      my $arg = @$cl[$j];
530      if ($arg eq "-buttonname"){
531        $content = @$cl[$j+1];
532        last;
533      } elsif ($arg eq "-metadata") {
534        $content = @$cl[$j+1];
535      }
536     
537    }
538      }
539      &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
540    }     
541    if ($started_classifiers) {
542      # end the classifiers
543      &write_line('COLCFG', ["</classifierList>"]);
544      # close off the Browse service
545      &write_line('COLCFG', ["</serviceRack>"]);
546    }
547   
548    # the phind classifier is a separate service
549    if ($phind) {
550    # if phind classifier
551    &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
552    }
553
554   
555    &write_line('COLCFG', ["</serviceRackList>"]);
556    &write_line('COLCFG', ["</buildConfig>"]);
557
558    close (COLCFG);
559  }
560
561
562#########################################################
563
5641;
Note: See TracBrowser for help on using the browser.