root/main/trunk/greenstone2/perllib/buildConfigxml.pm @ 21439

Revision 21439, 18.6 KB (checked in by davidb, 10 years ago)

Support for 'infodbtype' added

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37# Wrapper that ensures the right version of XML::Parser is loaded given
38# the version of Perl being used.  Need to distinguish between Perl 5.6 and
39# Perl 5.8
40sub BEGIN {
41    my $perl_dir;
42
43    # Note: $] encodes the version number of perl
44    if ($]>5.008) {
45    # perl 5.8.1 or above
46    $perl_dir = "perl-5.8";
47    }
48    elsif ($]<5.008) {
49    # assume perl 5.6
50    $perl_dir = "perl-5.6";
51    }
52    else {
53    print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
54    print STDERR "         Please upgrade to a newer version of Perl.\n";
55    $perl_dir = "perl-5.8";
56    }
57
58    if ($ENV{'GSDLOS'} !~ /^windows$/i) {
59    # Use push to put this on the end, so an existing XML::Parser will be used by default
60    push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
61    }
62}
63
64use XML::Parser;
65
66
67# A mapping hash to resolve name discrepancy between gs2 and gs3.
68my $nameMap = {"numDocs" => "numdocs",
69           "buildType" => "buildtype"
70           };
71
72
73# A hash structure which is returned by sub read_cfg_file.
74my $data = {};
75
76# use those unique attribute values to locate the text within the elements
77my $currentLocation = "";
78my $stringexp = q/^(buildType|numDocs)$/;
79 
80my $indexmap_name = "";
81my $haveindexfields = 0;
82
83# Reads in the model collection configuration file, collectionConfig.xml,
84# into a structure which complies with the one used by gs2 (i.e. one read
85# in by &cfgread::read_cfg_file).
86sub read_cfg_file {
87    my ($filename) = @_;
88    $data = {};
89    if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
90        return undef;
91    }
92
93    # create XML::Parser object for parsing metadata.xml files
94    my $parser;
95    if ($]<5.008) {
96    # Perl 5.6
97    $parser = new XML::Parser('Style' => 'Stream',
98                  'Handlers' => {'Char' => \&Char,
99                         'Doctype' => \&Doctype
100                         });
101    }
102    else {
103    # Perl 5.8
104    $parser = new XML::Parser('Style' => 'Stream',
105                  'ProtocolEncoding' => 'ISO-8859-1',
106                  'Handlers' => {'Char' => \&Char,
107                         'Doctype' => \&Doctype
108                         });
109    }
110
111    if (!open (COLCFG, $filename)) {
112    print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
113    } else {
114
115      $parser->parsefile ($filename);# (COLCFG);
116      close (COLCFG);
117    }
118
119    #&Display;
120    return $data;
121}
122
123sub StartTag {
124# Those marked with #@ will not be executed at the same time when this sub is being called
125# so that if/elsif is used to avoid unnecessary tests
126    my ($expat, $element) = @_;
127   
128    my $name = $_{'name'};
129    my $shortname = $_{'shortname'};
130
131   
132    #@ handling block metadataList
133    if (defined $name and $name =~ /$stringexp/){
134      $currentLocation = $name;
135      # the value will be retrieved later in Text sub
136    }
137
138    #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
139    elsif ($element =~ /^indexList$/) {
140    # set up the data arrays
141    # this assumes that the build type has been read already, which is
142    # currently the order we save the file in.
143    if ($data->{'buildtype'} eq "mg") {
144        $indexmap_name = "indexmap";
145        if (!defined $data->{"indexmap"}) {
146        $data->{"indexmap"} = [];
147        }
148    }
149    else {
150        $indexmap_name = "indexfieldmap";
151        $haveindexfields = 1;
152        if (!defined $data->{"indexfieldmap"}) {
153        $data->{"indexfieldmap"} = [];
154        }
155        if (!defined $data->{"indexfields"}) {
156        $data->{"indexfields"} = [];
157        }
158
159    }
160   
161    }
162   
163    elsif ($element =~ /index/) {
164    # store each index in the map
165    if (defined $name && defined $shortname) {
166        push @{$data->{$indexmap_name}}, "$name->$shortname";
167        if ($haveindexfields) {
168        push @{$data->{'indexfields'}}, $name;
169        }
170    }
171    }
172
173
174}
175
176sub EndTag {
177    my ($expat, $element) = @_;
178}
179
180sub Text {
181    if (defined $currentLocation) {
182    #@ Handling block metadataList(numDocs, buildType)
183    if($currentLocation =~ /$stringexp/){
184        #print $currentLocation;
185        my $key = $nameMap->{$currentLocation};
186        $data->{$key} = $_;
187        undef $currentLocation;
188    }   
189    }   
190}
191
192# This sub is for debugging purposes
193sub Display {
194
195    print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
196    print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
197    print  "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
198    print  "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
199    print  "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
200
201}
202
203# is this actually used??
204sub Doctype {
205    my ($expat, $name, $sysid, $pubid, $internal) = @_;
206
207    die if ($name !~ /^buildConfig$/);
208}
209
210# This Char function overrides the one in XML::Parser::Stream to overcome a
211# problem where $expat->{Text} is treated as the return value, slowing
212# things down significantly in some cases.
213sub Char {
214    if ($]<5.008) {
215    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
216    }
217    $_[0]->{'Text'} .= $_[1];
218    return undef;
219}
220
221
222
223sub write_line {
224    my ($filehandle, $line) = @_;
225    print $filehandle join ("", @$line), "\n";
226}
227
228# Create the buildConfig.xml file for a specific collection
229sub write_cfg_file {
230    # this sub is called in make_auxiliary_files() in basebuilder.pm
231    # the received args: $buildoutfile - destination file: buildConfig.xml
232    #                    $buildcfg - all build options, eg, disable_OAI
233    #                    $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
234    my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_;
235    my $line = [];
236
237    if (!open (COLCFG, ">$buildoutfile")) {
238    print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
239    die;
240    }
241
242    &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]); 
243
244    # output building metadata to build config file
245    my $buildtype;
246    if (defined $buildcfg->{"buildtype"}) {
247    $buildtype = $buildcfg->{"buildtype"};
248    } else {
249    $buildtype = "mgpp";
250    }
251    my $numdocs;
252    if (defined $buildcfg->{"numdocs"}) {
253      $numdocs = $buildcfg->{"numdocs"};
254    }
255    &write_line('COLCFG', ["<metadataList>"]);
256    &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
257    &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
258    &write_line('COLCFG', ["</metadataList>"]);
259
260    my $service_type = "MGPP";
261    if ($buildtype eq "mg") {
262    $service_type = "MG";
263    } elsif ($buildtype eq "lucene") {
264    $service_type = "Lucene";
265    }
266
267    # output serviceRackList
268    &write_line('COLCFG', ["<serviceRackList>"]);
269
270    # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class
271    # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml.
272    if ($disable_OAI == 0) {
273      &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]);
274      if (defined $buildcfg->{'indexstem'}) {
275        my $indexstem = $buildcfg->{'indexstem'};
276        &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
277      }
278      if (defined $buildcfg->{'infodbtype'}) {
279      my $infodbtype = $buildcfg->{'infodbtype'};
280      &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
281      }
282      &write_line('COLCFG', ["</serviceRack>"]);
283    }   
284    # do the search service
285    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
286    if (defined $buildcfg->{'indexstem'}) {
287      my $indexstem = $buildcfg->{'indexstem'};
288      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
289    }
290    if (defined $buildcfg->{'infodbtype'}) {
291        my $infodbtype = $buildcfg->{'infodbtype'};
292        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
293    }
294
295    #indexes
296    # maps index name to shortname
297    my $indexmap = {};
298    # keeps the order for indexes
299    my @indexlist = ();
300   
301    my $defaultindex = "";
302    my $first = 1;
303    my $maptype = "indexfieldmap";
304    if ($buildtype eq "mg") {
305    $maptype = "indexmap";
306    }
307
308    #map {print $_."\n"} keys %$buildcfg;
309
310    if (defined $buildcfg->{$maptype}) {
311    my $indexmap_t = $buildcfg->{$maptype};
312       foreach my $i (@$indexmap_t) {
313    my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
314        $indexmap->{$k} = $v;
315        push @indexlist, $k;
316        if ($first) {
317        $defaultindex = $v;
318        $first = 0;
319        }   
320    }
321    # now if the user has assigned a default index, we use it
322    if (defined $collectcfg->{"defaultindex"}) {
323      $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
324    }
325   
326    } else {
327    print STDERR "$maptype not defined";
328    } 
329    #for each index in indexList, write them out
330    &write_line('COLCFG', ["<indexList>"]);
331    foreach my $i (@indexlist) {
332    my $index = $indexmap->{$i};
333    &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
334    }   
335    &write_line('COLCFG', ["</indexList>"]);
336
337   
338    &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
339
340
341    # do indexOptionList
342    if ($buildtype eq "mg" || $buildtype eq "mgpp") {
343        &write_line('COLCFG', ["<indexOptionList>"]);
344    my $stemindexes = 3; # default is stem and casefold
345    if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
346        $stemindexes = $buildcfg->{'stemindexes'};
347    }
348    &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
349   
350    my $maxnumeric = 4; # default
351    if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
352        $maxnumeric = $buildcfg->{'maxnumeric'};
353    }
354    &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
355        &write_line('COLCFG', ["</indexOptionList>"]);
356    }
357
358    #  levelList
359    my $levelmap = {};
360    my @levellist = ();
361    my $default_search_level = "Doc";
362    my $default_retrieve_level = "Doc";
363    my $default_db_level = "Doc";
364    $first = 1;
365    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
366    if (defined $buildcfg->{'levelmap'}) {
367        my $levelmap_t = $buildcfg->{'levelmap'};
368        foreach my $l (@$levelmap_t) {
369        my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
370        $levelmap->{$key} = $val;
371        push @levellist, $key;
372        if ($first) {
373            # let default search level follow the first level in the level list
374            $default_search_level = $val;
375            # retrieve/database levels may get modified later if text level is defined
376            $default_retrieve_level = $val;
377            $default_db_level = $val;
378            $first = 0;
379        }
380        }
381    }
382    # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
383        #if (defined $collectcfg->{"defaultlevel"}) {
384    $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
385        #  $default_retrieve_level = $default_search_level;
386    #}
387   
388    if (defined $buildcfg->{'textlevel'}) {
389       # let the retrieve/database levels always follow the textlevel
390           $default_retrieve_level = $buildcfg->{'textlevel'};
391       $default_db_level = $buildcfg->{'textlevel'};
392         
393    }
394    }
395    #for each level in levelList, write them out
396    if ($buildtype ne "mg") {
397    &write_line('COLCFG', ["<levelList>"]);
398    foreach my $lv (@levellist) {
399    my $level = $levelmap->{$lv};
400        &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
401    }   
402    &write_line('COLCFG', ["</levelList>"]);
403    }
404    # add in defaultLevel as the same level as indexLevelList, making the reading job easier
405    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
406    &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
407    }
408    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
409        &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
410    }
411    # do searchTypeList
412    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
413      &write_line('COLCFG', ["<searchTypeList>"]);
414     
415      if (defined $buildcfg->{"searchtype"}) {
416      my $searchtype_t = $buildcfg->{"searchtype"};
417      foreach my $s (@$searchtype_t) {
418      &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
419    }
420      } else {
421      &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
422      &write_line('COLCFG', ["<searchType name=\"form\" />"]);
423      }
424      &write_line('COLCFG', ["</searchTypeList>"]);
425    }
426
427    # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
428    $first = 1;
429    my $default_lang = "";
430    my $default_lang_short = "";
431    if (defined $buildcfg->{"languagemap"}) {
432      &write_line('COLCFG', ["<indexLanguageList>"]);
433
434      my $langmap_t = $buildcfg->{"languagemap"};
435      foreach my $l (@$langmap_t) {
436    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
437
438    &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
439    if ($first) {
440      $default_lang = $k; #name
441      $default_lang_short = $v; #shortname
442      $first = 0;
443    }
444      }
445
446      &write_line('COLCFG', ["</indexLanguageList>"]);
447      # now if the user has assigned a default language (as "en", "ru" etc.)
448      if (defined $collectcfg->{"defaultlanguage"}) {
449    $default_lang = $collectcfg->{"defaultlanguage"};
450      }
451      &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
452    }
453
454
455    # do indexSubcollectionList
456    my $default_subcol = "";# make it in sub scope to be used in the concatenation
457    if (defined $buildcfg->{'subcollectionmap'}) {
458      &write_line('COLCFG', ["<indexSubcollectionList>"]);
459      my $subcolmap = {};
460      my @subcollist = ();
461      $first = 1;
462      my $subcolmap_t = $buildcfg->{'subcollectionmap'};
463      foreach my $l (@$subcolmap_t) {
464    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
465    $subcolmap->{$k} = $v;
466    push @subcollist, $k;
467    if ($first) {
468      $default_subcol = $v;
469      $first = 0;
470    }
471      }
472      foreach my $sl (@subcollist) {
473    my $subcol = $subcolmap->{$sl};
474    &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
475      }
476
477      &write_line('COLCFG', ["</indexSubcollectionList>"]);
478      &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
479    }
480     
481    # close off search service
482    &write_line('COLCFG', ["</serviceRack>"]);
483
484    # do the retrieve service
485    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
486
487    # do default index
488    if (defined $buildcfg->{"languagemap"}) {
489    &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
490    }
491    if (defined $buildcfg->{'subcollectionmap'}) {
492    &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
493    }
494    if ($buildtype eq "mg") {
495      &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
496    }
497
498    if (defined $buildcfg->{'indexstem'}) {
499      my $indexstem = $buildcfg->{'indexstem'};
500      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
501    }
502    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
503      &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
504    }
505    if (defined $buildcfg->{'infodbtype'}) {
506        my $infodbtype = $buildcfg->{'infodbtype'};
507        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
508    }
509
510    &write_line('COLCFG', ["</serviceRack>"]);
511
512    # do the browse service
513    my $count = 1;
514    my $phind = 0;
515    my $started_classifiers = 0;
516
517    my $classifiers = $collectcfg->{"classify"};
518    foreach my $cl (@$classifiers) {
519      my $name = "CL$count";
520      $count++;
521      my ($classname) = @$cl[0];
522      if ($classname =~ /^phind$/i) {
523    $phind=1;
524    #should add it into coll config classifiers
525    next;
526      }
527     
528      if (not $started_classifiers) {
529    &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
530    if (defined $buildcfg->{'indexstem'}) {
531      my $indexstem = $buildcfg->{'indexstem'};
532      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
533    }
534    if (defined $buildcfg->{'infodbtype'}) {
535        my $infodbtype = $buildcfg->{'infodbtype'};
536        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
537    }
538    &write_line('COLCFG', ["<classifierList>"]);       
539    $started_classifiers = 1;
540      }
541      my $content = ''; #use buttonname first, then metadata
542      if ($classname eq "DateList") {
543    $content = "Date";
544      } else {
545    for (my $j=0; $j<scalar(@$cl); $j++) {
546      my $arg = @$cl[$j];
547      if ($arg eq "-buttonname"){
548        $content = @$cl[$j+1];
549        last;
550      } elsif ($arg eq "-metadata") {
551        $content = @$cl[$j+1];
552      }
553     
554    }
555      }
556      &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
557    }     
558    if ($started_classifiers) {
559      # end the classifiers
560      &write_line('COLCFG', ["</classifierList>"]);
561      # close off the Browse service
562      &write_line('COLCFG', ["</serviceRack>"]);
563    }
564   
565    # the phind classifier is a separate service
566    if ($phind) {
567    # if phind classifier
568    &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
569    }
570
571   
572    &write_line('COLCFG', ["</serviceRackList>"]);
573    &write_line('COLCFG', ["</buildConfig>"]);
574
575    close (COLCFG);
576  }
577
578
579#########################################################
580
5811;
Note: See TracBrowser for help on using the browser.