root/main/trunk/greenstone2/perllib/buildConfigxml.pm @ 22485

Revision 22485, 18.1 KB (checked in by ak19, 10 years ago)

1. Dr Bainbridge fixed the database perl modules to all have the method read_info_keys (which reads the keys from the database into a map), so that dbutil.pm can have the same as a generic method. 2. buildConfigxml.pm only writes out the defaultIndex if it is set (to prevent an Uninitialised Variable warning message from Perl).

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used.  Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41    my $perl_dir;
42
43    # Note: $] encodes the version number of perl
44    if ($]>=5.010) {
45    $perl_dir = "perl-5.10";
46    }
47    elsif ($]>5.008) {
48    # perl 5.8.1 or above
49    $perl_dir = "perl-5.8";
50    }
51    elsif ($]<5.008) {
52    # assume perl 5.6
53    $perl_dir = "perl-5.6";
54    }
55    else {
56    print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
57    print STDERR "         Please upgrade to a newer version of Perl.\n";
58    $perl_dir = "perl-5.8";
59    }
60
61    if ($ENV{'GSDLOS'} !~ /^windows$/i) {
62    # Use push to put this on the end, so an existing XML::Parser will be used by default
63    push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
64    }
65}
66
67use XML::Parser;
68
69
70# A mapping hash to resolve name discrepancy between gs2 and gs3.
71my $nameMap = {"numDocs" => "numdocs",
72           "buildType" => "buildtype"
73           };
74
75
76# A hash structure which is returned by sub read_cfg_file.
77my $data = {};
78
79# use those unique attribute values to locate the text within the elements
80my $currentLocation = "";
81my $stringexp = q/^(buildType|numDocs)$/;
82 
83my $indexmap_name = "";
84my $haveindexfields = 0;
85
86# Reads in the model collection configuration file, collectionConfig.xml,
87# into a structure which complies with the one used by gs2 (i.e. one read
88# in by &cfgread::read_cfg_file).
89sub read_cfg_file {
90    my ($filename) = @_;
91    $data = {};
92    if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
93        return undef;
94    }
95
96    # create XML::Parser object for parsing metadata.xml files
97    my $parser;
98    if ($]<5.008) {
99    # Perl 5.6
100    $parser = new XML::Parser('Style' => 'Stream',
101                  'Handlers' => {'Char' => \&Char,
102                         'Doctype' => \&Doctype
103                         });
104    }
105    else {
106    # Perl 5.8
107    $parser = new XML::Parser('Style' => 'Stream',
108                  'ProtocolEncoding' => 'ISO-8859-1',
109                  'Handlers' => {'Char' => \&Char,
110                         'Doctype' => \&Doctype
111                         });
112    }
113
114    if (!open (COLCFG, $filename)) {
115    print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
116    } else {
117
118      $parser->parsefile ($filename);# (COLCFG);
119      close (COLCFG);
120    }
121
122    #&Display;
123    return $data;
124}
125
126sub StartTag {
127# Those marked with #@ will not be executed at the same time when this sub is being called
128# so that if/elsif is used to avoid unnecessary tests
129    my ($expat, $element) = @_;
130   
131    my $name = $_{'name'};
132    my $shortname = $_{'shortname'};
133
134   
135    #@ handling block metadataList
136    if (defined $name and $name =~ /$stringexp/){
137      $currentLocation = $name;
138      # the value will be retrieved later in Text sub
139    }
140
141    #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
142    elsif ($element =~ /^indexList$/) {
143    # set up the data arrays
144    # this assumes that the build type has been read already, which is
145    # currently the order we save the file in.
146    if ($data->{'buildtype'} eq "mg") {
147        $indexmap_name = "indexmap";
148        if (!defined $data->{"indexmap"}) {
149        $data->{"indexmap"} = [];
150        }
151    }
152    else {
153        $indexmap_name = "indexfieldmap";
154        $haveindexfields = 1;
155        if (!defined $data->{"indexfieldmap"}) {
156        $data->{"indexfieldmap"} = [];
157        }
158        if (!defined $data->{"indexfields"}) {
159        $data->{"indexfields"} = [];
160        }
161
162    }
163   
164    }
165   
166    elsif ($element =~ /index/) {
167    # store each index in the map
168    if (defined $name && defined $shortname) {
169        push @{$data->{$indexmap_name}}, "$name->$shortname";
170        if ($haveindexfields) {
171        push @{$data->{'indexfields'}}, $name;
172        }
173    }
174    }
175
176
177}
178
179sub EndTag {
180    my ($expat, $element) = @_;
181}
182
183sub Text {
184    if (defined $currentLocation) {
185    #@ Handling block metadataList(numDocs, buildType)
186    if($currentLocation =~ /$stringexp/){
187        #print $currentLocation;
188        my $key = $nameMap->{$currentLocation};
189        $data->{$key} = $_;
190        undef $currentLocation;
191    }   
192    }   
193}
194
195# This sub is for debugging purposes
196sub Display {
197
198    print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
199    print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
200    print  "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
201    print  "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
202    print  "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
203
204}
205
206# is this actually used??
207sub Doctype {
208    my ($expat, $name, $sysid, $pubid, $internal) = @_;
209
210    die if ($name !~ /^buildConfig$/);
211}
212
213# This Char function overrides the one in XML::Parser::Stream to overcome a
214# problem where $expat->{Text} is treated as the return value, slowing
215# things down significantly in some cases.
216sub Char {
217    if ($]<5.008) {
218    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
219    }
220    $_[0]->{'Text'} .= $_[1];
221    return undef;
222}
223
224
225
226sub write_line {
227    my ($filehandle, $line) = @_;
228    print $filehandle join ("", @$line), "\n";
229}
230
231# Create the buildConfig.xml file for a specific collection
232sub write_cfg_file {
233    # this sub is called in make_auxiliary_files() in basebuilder.pm
234    # the received args: $buildoutfile - destination file: buildConfig.xml
235    #                    $buildcfg - all build options,
236    #                    $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
237    my ($buildoutfile, $buildcfg, $collectcfg) = @_;
238    my $line = [];
239
240    if (!open (COLCFG, ">$buildoutfile")) {
241    print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
242    die;
243    }
244
245    &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]); 
246
247    # output building metadata to build config file
248    my $buildtype;
249    if (defined $buildcfg->{"buildtype"}) {
250    $buildtype = $buildcfg->{"buildtype"};
251    } else {
252    $buildtype = "mgpp";
253    }
254    my $numdocs;
255    if (defined $buildcfg->{"numdocs"}) {
256      $numdocs = $buildcfg->{"numdocs"};
257    }
258    &write_line('COLCFG', ["<metadataList>"]);
259    &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
260    &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
261    if (defined $buildcfg->{'indexstem'}) {
262    &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
263    }
264    if (defined $buildcfg->{'infodbtype'}) {
265    &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
266    }
267    &write_line('COLCFG', ["</metadataList>"]);
268
269    my $service_type = "MGPP";
270    if ($buildtype eq "mg") {
271    $service_type = "MG";
272    } elsif ($buildtype eq "lucene") {
273    $service_type = "Lucene";
274    }
275
276    # output serviceRackList
277    &write_line('COLCFG', ["<serviceRackList>"]);
278
279    # do the search service
280    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
281    if (defined $buildcfg->{'indexstem'}) {
282      my $indexstem = $buildcfg->{'indexstem'};
283      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
284    }
285    if (defined $buildcfg->{'infodbtype'}) {
286        my $infodbtype = $buildcfg->{'infodbtype'};
287        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
288    }
289
290    #indexes
291    # maps index name to shortname
292    my $indexmap = {};
293    # keeps the order for indexes
294    my @indexlist = ();
295   
296    my $defaultindex = "";
297    my $first = 1;
298    my $maptype = "indexfieldmap";
299    if ($buildtype eq "mg") {
300    $maptype = "indexmap";
301    }
302
303    #map {print $_."\n"} keys %$buildcfg;
304
305    if (defined $buildcfg->{$maptype}) {
306    my $indexmap_t = $buildcfg->{$maptype};
307       foreach my $i (@$indexmap_t) {
308    my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
309        $indexmap->{$k} = $v;
310        push @indexlist, $k;
311        if ($first) {
312        $defaultindex = $v;
313        $first = 0;
314        }   
315    }
316    # now if the user has assigned a default index, we use it
317    if (defined $collectcfg->{"defaultindex"}) {
318      $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
319    }
320   
321    } else {
322    print STDERR "$maptype not defined";
323    } 
324    #for each index in indexList, write them out
325    &write_line('COLCFG', ["<indexList>"]);
326    foreach my $i (@indexlist) {
327    my $index = $indexmap->{$i};
328    &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
329    }   
330    &write_line('COLCFG', ["</indexList>"]);
331
332   
333    #$defaultindex = "ZZ" if (!$defaultindex); # index allfields by default
334    if($defaultindex) {
335    &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
336    }
337
338
339    # do indexOptionList
340    if ($buildtype eq "mg" || $buildtype eq "mgpp") {
341        &write_line('COLCFG', ["<indexOptionList>"]);
342    my $stemindexes = 3; # default is stem and casefold
343    if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
344        $stemindexes = $buildcfg->{'stemindexes'};
345    }
346    &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
347   
348    my $maxnumeric = 4; # default
349    if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
350        $maxnumeric = $buildcfg->{'maxnumeric'};
351    }
352    &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
353        &write_line('COLCFG', ["</indexOptionList>"]);
354    }
355
356    #  levelList
357    my $levelmap = {};
358    my @levellist = ();
359    my $default_search_level = "Doc";
360    my $default_retrieve_level = "Doc";
361    my $default_db_level = "Doc";
362    $first = 1;
363    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
364    if (defined $buildcfg->{'levelmap'}) {
365        my $levelmap_t = $buildcfg->{'levelmap'};
366        foreach my $l (@$levelmap_t) {
367        my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
368        $levelmap->{$key} = $val;
369        push @levellist, $key;
370        if ($first) {
371            # let default search level follow the first level in the level list
372            $default_search_level = $val;
373            # retrieve/database levels may get modified later if text level is defined
374            $default_retrieve_level = $val;
375            $default_db_level = $val;
376            $first = 0;
377        }
378        }
379    }
380    # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
381        #if (defined $collectcfg->{"defaultlevel"}) {
382    $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
383        #  $default_retrieve_level = $default_search_level;
384    #}
385   
386    if (defined $buildcfg->{'textlevel'}) {
387       # let the retrieve/database levels always follow the textlevel
388           $default_retrieve_level = $buildcfg->{'textlevel'};
389       $default_db_level = $buildcfg->{'textlevel'};
390         
391    }
392    }
393    #for each level in levelList, write them out
394    if ($buildtype ne "mg") {
395    &write_line('COLCFG', ["<levelList>"]);
396    foreach my $lv (@levellist) {
397    my $level = $levelmap->{$lv};
398        &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
399    }   
400    &write_line('COLCFG', ["</levelList>"]);
401    }
402    # add in defaultLevel as the same level as indexLevelList, making the reading job easier
403    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
404    &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
405    }
406    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
407        &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
408    }
409    # do searchTypeList
410    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
411      &write_line('COLCFG', ["<searchTypeList>"]);
412     
413      if (defined $buildcfg->{"searchtype"}) {
414      my $searchtype_t = $buildcfg->{"searchtype"};
415      foreach my $s (@$searchtype_t) {
416      &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
417    }
418      } else {
419      &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
420      &write_line('COLCFG', ["<searchType name=\"form\" />"]);
421      }
422      &write_line('COLCFG', ["</searchTypeList>"]);
423    }
424
425    # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
426    $first = 1;
427    my $default_lang = "";
428    my $default_lang_short = "";
429    if (defined $buildcfg->{"languagemap"}) {
430      &write_line('COLCFG', ["<indexLanguageList>"]);
431
432      my $langmap_t = $buildcfg->{"languagemap"};
433      foreach my $l (@$langmap_t) {
434    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
435
436    &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
437    if ($first) {
438      $default_lang = $k; #name
439      $default_lang_short = $v; #shortname
440      $first = 0;
441    }
442      }
443
444      &write_line('COLCFG', ["</indexLanguageList>"]);
445      # now if the user has assigned a default language (as "en", "ru" etc.)
446      if (defined $collectcfg->{"defaultlanguage"}) {
447    $default_lang = $collectcfg->{"defaultlanguage"};
448      }
449      &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
450    }
451
452
453    # do indexSubcollectionList
454    my $default_subcol = "";# make it in sub scope to be used in the concatenation
455    if (defined $buildcfg->{'subcollectionmap'}) {
456      &write_line('COLCFG', ["<indexSubcollectionList>"]);
457      my $subcolmap = {};
458      my @subcollist = ();
459      $first = 1;
460      my $subcolmap_t = $buildcfg->{'subcollectionmap'};
461      foreach my $l (@$subcolmap_t) {
462    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
463    $subcolmap->{$k} = $v;
464    push @subcollist, $k;
465    if ($first) {
466      $default_subcol = $v;
467      $first = 0;
468    }
469      }
470      foreach my $sl (@subcollist) {
471    my $subcol = $subcolmap->{$sl};
472    &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
473      }
474
475      &write_line('COLCFG', ["</indexSubcollectionList>"]);
476      &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
477    }
478     
479    # close off search service
480    &write_line('COLCFG', ["</serviceRack>"]);
481
482    # do the retrieve service
483    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
484
485    # do default index
486    if (defined $buildcfg->{"languagemap"}) {
487    &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
488    }
489    if (defined $buildcfg->{'subcollectionmap'}) {
490    &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
491    }
492    if ($buildtype eq "mg") {
493      &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
494    }
495
496    if (defined $buildcfg->{'indexstem'}) {
497      my $indexstem = $buildcfg->{'indexstem'};
498      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
499    }
500    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
501      &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
502    }
503    if (defined $buildcfg->{'infodbtype'}) {
504        my $infodbtype = $buildcfg->{'infodbtype'};
505        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
506    }
507
508    &write_line('COLCFG', ["</serviceRack>"]);
509
510    # do the browse service
511    my $count = 1;
512    my $phind = 0;
513    my $started_classifiers = 0;
514
515    my $classifiers = $collectcfg->{"classify"};
516    foreach my $cl (@$classifiers) {
517      my $name = "CL$count";
518      $count++;
519      my ($classname) = @$cl[0];
520      if ($classname =~ /^phind$/i) {
521    $phind=1;
522    #should add it into coll config classifiers
523    next;
524      }
525     
526      if (not $started_classifiers) {
527    &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
528    if (defined $buildcfg->{'indexstem'}) {
529      my $indexstem = $buildcfg->{'indexstem'};
530      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
531    }
532    if (defined $buildcfg->{'infodbtype'}) {
533        my $infodbtype = $buildcfg->{'infodbtype'};
534        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
535    }
536    &write_line('COLCFG', ["<classifierList>"]);       
537    $started_classifiers = 1;
538      }
539      my $content = ''; #use buttonname first, then metadata
540      if ($classname eq "DateList") {
541    $content = "Date";
542      } else {
543    for (my $j=0; $j<scalar(@$cl); $j++) {
544      my $arg = @$cl[$j];
545      if ($arg eq "-buttonname"){
546        $content = @$cl[$j+1];
547        last;
548      } elsif ($arg eq "-metadata") {
549        $content = @$cl[$j+1];
550      }
551     
552    }
553      }
554      &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
555    }     
556    if ($started_classifiers) {
557      # end the classifiers
558      &write_line('COLCFG', ["</classifierList>"]);
559      # close off the Browse service
560      &write_line('COLCFG', ["</serviceRack>"]);
561    }
562   
563    # the phind classifier is a separate service
564    if ($phind) {
565    # if phind classifier
566    &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
567    }
568
569   
570    &write_line('COLCFG', ["</serviceRackList>"]);
571    &write_line('COLCFG', ["</buildConfig>"]);
572
573    close (COLCFG);
574  }
575
576
577#########################################################
578
5791;
Note: See TracBrowser for help on using the browser.