root/main/trunk/greenstone2/perllib/buildConfigxml.pm @ 23895

Revision 23895, 17.1 KB (checked in by sjm84, 9 years ago)

Modified several Perl files to merge the locations where XML::Parser checked for the current version of perl into one location. Also tidied up several locations where a difference was specified between 5.6 and 5.8+ to do with ProtocolEncoding? being used to initialise an XML::Parser. Given the recent "

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# buildConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in buildConfig.xml
27# Note, only implemented the bits that are currently used, eg by incremental
28# build code.
29# The resulting data is not a full representation on buildConfig.xml.
30
31package buildConfigxml;
32
33use strict;
34no strict 'refs';
35no strict 'subs';
36
37use XMLParser;
38
39
40# A mapping hash to resolve name discrepancy between gs2 and gs3.
41my $nameMap = {"numDocs" => "numdocs",
42           "buildType" => "buildtype"
43           };
44
45
46# A hash structure which is returned by sub read_cfg_file.
47my $data = {};
48
49# use those unique attribute values to locate the text within the elements
50my $currentLocation = "";
51my $stringexp = q/^(buildType|numDocs)$/;
52 
53my $indexmap_name = "";
54my $haveindexfields = 0;
55
56# Reads in the model collection configuration file, collectionConfig.xml,
57# into a structure which complies with the one used by gs2 (i.e. one read
58# in by &cfgread::read_cfg_file).
59sub read_cfg_file {
60    my ($filename) = @_;
61    $data = {};
62    if ($filename !~ /buildConfig\.xml$/ || !-f $filename) {
63        return undef;
64    }
65
66    # Removed ProtocolEncoding (see MetadataXMLPlugin for details)
67
68    # create XML::Parser object for parsing metadata.xml files
69    my $parser = new XML::Parser('Style' => 'Stream',
70                 'Pkg' => 'buildConfigxml',
71                 'Handlers' => {'Char' => \&Char,
72                         'Doctype' => \&Doctype
73                         });
74
75    if (!open (COLCFG, $filename)) {
76    print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n";
77    } else {
78
79      $parser->parsefile ($filename);# (COLCFG);
80      close (COLCFG);
81    }
82
83    #&Display;
84    return $data;
85}
86
87sub StartTag {
88# Those marked with #@ will not be executed at the same time when this sub is being called
89# so that if/elsif is used to avoid unnecessary tests
90    my ($expat, $element) = @_;
91   
92    my $name = $_{'name'};
93    my $shortname = $_{'shortname'};
94
95   
96    #@ handling block metadataList
97    if (defined $name and $name =~ /$stringexp/){
98      $currentLocation = $name;
99      # the value will be retrieved later in Text sub
100    }
101
102    #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene)
103    elsif ($element =~ /^indexList$/) {
104    # set up the data arrays
105    # this assumes that the build type has been read already, which is
106    # currently the order we save the file in.
107    if ($data->{'buildtype'} eq "mg") {
108        $indexmap_name = "indexmap";
109        if (!defined $data->{"indexmap"}) {
110        $data->{"indexmap"} = [];
111        }
112    }
113    else {
114        $indexmap_name = "indexfieldmap";
115        $haveindexfields = 1;
116        if (!defined $data->{"indexfieldmap"}) {
117        $data->{"indexfieldmap"} = [];
118        }
119        if (!defined $data->{"indexfields"}) {
120        $data->{"indexfields"} = [];
121        }
122
123    }
124   
125    }
126   
127    elsif ($element =~ /index/) {
128    # store each index in the map
129    if (defined $name && defined $shortname) {
130        push @{$data->{$indexmap_name}}, "$name->$shortname";
131        if ($haveindexfields) {
132        push @{$data->{'indexfields'}}, $name;
133        }
134    }
135    }
136
137
138}
139
140sub EndTag {
141    my ($expat, $element) = @_;
142}
143
144sub Text {
145    if (defined $currentLocation) {
146    #@ Handling block metadataList(numDocs, buildType)
147    if($currentLocation =~ /$stringexp/){
148        #print $currentLocation;
149        my $key = $nameMap->{$currentLocation};
150        $data->{$key} = $_;
151        undef $currentLocation;
152    }   
153    }   
154}
155
156# This sub is for debugging purposes
157sub Display {
158
159    print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'});
160    print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'});
161    print  "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'});
162    print  "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'});
163    print  "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'});
164
165}
166
167# is this actually used??
168sub Doctype {
169    my ($expat, $name, $sysid, $pubid, $internal) = @_;
170
171    die if ($name !~ /^buildConfig$/);
172}
173
174# This Char function overrides the one in XML::Parser::Stream to overcome a
175# problem where $expat->{Text} is treated as the return value, slowing
176# things down significantly in some cases.
177sub Char {
178    if ($]<5.008) {
179    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
180    }
181    $_[0]->{'Text'} .= $_[1];
182    return undef;
183}
184
185
186
187sub write_line {
188    my ($filehandle, $line) = @_;
189    print $filehandle join ("", @$line), "\n";
190}
191
192# Create the buildConfig.xml file for a specific collection
193sub write_cfg_file {
194    # this sub is called in make_auxiliary_files() in basebuilder.pm
195    # the received args: $buildoutfile - destination file: buildConfig.xml
196    #                    $buildcfg - all build options,
197    #                    $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm.
198    my ($buildoutfile, $buildcfg, $collectcfg) = @_;
199    my $line = [];
200
201    if (!open (COLCFG, ">$buildoutfile")) {
202    print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n";
203    die;
204    }
205
206    &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]); 
207
208    # output building metadata to build config file
209    my $buildtype;
210    if (defined $buildcfg->{"buildtype"}) {
211    $buildtype = $buildcfg->{"buildtype"};
212    } else {
213    $buildtype = "mgpp";
214    }
215    my $numdocs;
216    if (defined $buildcfg->{"numdocs"}) {
217      $numdocs = $buildcfg->{"numdocs"};
218    }
219    &write_line('COLCFG', ["<metadataList>"]);
220    &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
221    &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
222    if (defined $buildcfg->{'indexstem'}) {
223    &write_line('COLCFG', ["<metadata name=\"indexStem\">", $buildcfg->{"indexstem"}, "</metadata>"]);
224    }
225    if (defined $buildcfg->{'infodbtype'}) {
226    &write_line('COLCFG', ["<metadata name=\"infodbType\">", $buildcfg->{"infodbtype"}, "</metadata>"]);
227    }
228    &write_line('COLCFG', ["</metadataList>"]);
229
230    my $service_type = "MGPP";
231    if ($buildtype eq "mg") {
232    $service_type = "MG";
233    } elsif ($buildtype eq "lucene") {
234    $service_type = "Lucene";
235    }
236
237    # output serviceRackList
238    &write_line('COLCFG', ["<serviceRackList>"]);
239
240    # do the search service
241    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
242    if (defined $buildcfg->{'indexstem'}) {
243      my $indexstem = $buildcfg->{'indexstem'};
244      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
245    }
246    if (defined $buildcfg->{'infodbtype'}) {
247        my $infodbtype = $buildcfg->{'infodbtype'};
248        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
249    }
250
251    #indexes
252    # maps index name to shortname
253    my $indexmap = {};
254    # keeps the order for indexes
255    my @indexlist = ();
256   
257    my $defaultindex = "";
258    my $first = 1;
259    my $maptype = "indexfieldmap";
260    if ($buildtype eq "mg") {
261    $maptype = "indexmap";
262    }
263
264    #map {print $_."\n"} keys %$buildcfg;
265
266    if (defined $buildcfg->{$maptype}) {
267    my $indexmap_t = $buildcfg->{$maptype};
268       foreach my $i (@$indexmap_t) {
269    my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
270        $indexmap->{$k} = $v;
271        push @indexlist, $k;
272        if ($first) {
273        $defaultindex = $v;
274        $first = 0;
275        }   
276    }
277    # now if the user has assigned a default index, we use it
278    if (defined $collectcfg->{"defaultindex"}) {
279      $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
280    }
281   
282    } else {
283    print STDERR "$maptype not defined";
284    } 
285    #for each index in indexList, write them out
286    &write_line('COLCFG', ["<indexList>"]);
287    foreach my $i (@indexlist) {
288    my $index = $indexmap->{$i};
289    &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
290    }   
291    &write_line('COLCFG', ["</indexList>"]);
292
293   
294    #$defaultindex = "ZZ" if (!$defaultindex); # index allfields by default
295    if($defaultindex) {
296    &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
297    }
298
299
300    # do indexOptionList
301    if ($buildtype eq "mg" || $buildtype eq "mgpp") {
302        &write_line('COLCFG', ["<indexOptionList>"]);
303    my $stemindexes = 3; # default is stem and casefold
304    if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
305        $stemindexes = $buildcfg->{'stemindexes'};
306    }
307    &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
308   
309    my $maxnumeric = 4; # default
310    if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
311        $maxnumeric = $buildcfg->{'maxnumeric'};
312    }
313    &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
314        &write_line('COLCFG', ["</indexOptionList>"]);
315    }
316
317    #  levelList
318    my $levelmap = {};
319    my @levellist = ();
320    my $default_search_level = "Doc";
321    my $default_retrieve_level = "Doc";
322    my $default_db_level = "Doc";
323    $first = 1;
324    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
325    if (defined $buildcfg->{'levelmap'}) {
326        my $levelmap_t = $buildcfg->{'levelmap'};
327        foreach my $l (@$levelmap_t) {
328        my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
329        $levelmap->{$key} = $val;
330        push @levellist, $key;
331        if ($first) {
332            # let default search level follow the first level in the level list
333            $default_search_level = $val;
334            # retrieve/database levels may get modified later if text level is defined
335            $default_retrieve_level = $val;
336            $default_db_level = $val;
337            $first = 0;
338        }
339        }
340    }
341    # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same.
342        #if (defined $collectcfg->{"defaultlevel"}) {
343    $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
344        #  $default_retrieve_level = $default_search_level;
345    #}
346   
347    if (defined $buildcfg->{'textlevel'}) {
348       # let the retrieve/database levels always follow the textlevel
349           $default_retrieve_level = $buildcfg->{'textlevel'};
350       $default_db_level = $buildcfg->{'textlevel'};
351         
352    }
353    }
354    #for each level in levelList, write them out
355    if ($buildtype ne "mg") {
356    &write_line('COLCFG', ["<levelList>"]);
357    foreach my $lv (@levellist) {
358    my $level = $levelmap->{$lv};
359        &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
360    }   
361    &write_line('COLCFG', ["</levelList>"]);
362    }
363    # add in defaultLevel as the same level as indexLevelList, making the reading job easier
364    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
365    &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
366    }
367    if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
368        &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]);
369    }
370    # do searchTypeList
371    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
372      &write_line('COLCFG', ["<searchTypeList>"]);
373     
374      if (defined $buildcfg->{"searchtype"}) {
375      my $searchtype_t = $buildcfg->{"searchtype"};
376      foreach my $s (@$searchtype_t) {
377      &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
378    }
379      } else {
380      &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
381      &write_line('COLCFG', ["<searchType name=\"form\" />"]);
382      }
383      &write_line('COLCFG', ["</searchTypeList>"]);
384    }
385
386    # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
387    $first = 1;
388    my $default_lang = "";
389    my $default_lang_short = "";
390    if (defined $buildcfg->{"languagemap"}) {
391      &write_line('COLCFG', ["<indexLanguageList>"]);
392
393      my $langmap_t = $buildcfg->{"languagemap"};
394      foreach my $l (@$langmap_t) {
395    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
396
397    &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
398    if ($first) {
399      $default_lang = $k; #name
400      $default_lang_short = $v; #shortname
401      $first = 0;
402    }
403      }
404
405      &write_line('COLCFG', ["</indexLanguageList>"]);
406      # now if the user has assigned a default language (as "en", "ru" etc.)
407      if (defined $collectcfg->{"defaultlanguage"}) {
408    $default_lang = $collectcfg->{"defaultlanguage"};
409      }
410      &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
411    }
412
413
414    # do indexSubcollectionList
415    my $default_subcol = "";# make it in sub scope to be used in the concatenation
416    if (defined $buildcfg->{'subcollectionmap'}) {
417      &write_line('COLCFG', ["<indexSubcollectionList>"]);
418      my $subcolmap = {};
419      my @subcollist = ();
420      $first = 1;
421      my $subcolmap_t = $buildcfg->{'subcollectionmap'};
422      foreach my $l (@$subcolmap_t) {
423    my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
424    $subcolmap->{$k} = $v;
425    push @subcollist, $k;
426    if ($first) {
427      $default_subcol = $v;
428      $first = 0;
429    }
430      }
431      foreach my $sl (@subcollist) {
432    my $subcol = $subcolmap->{$sl};
433    &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
434      }
435
436      &write_line('COLCFG', ["</indexSubcollectionList>"]);
437      &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
438    }
439     
440    # close off search service
441    &write_line('COLCFG', ["</serviceRack>"]);
442
443    # do the retrieve service
444    &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
445
446    # do default index
447    if (defined $buildcfg->{"languagemap"}) {
448    &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
449    }
450    if (defined $buildcfg->{'subcollectionmap'}) {
451    &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
452    }
453    if ($buildtype eq "mg") {
454      &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
455    }
456
457    if (defined $buildcfg->{'indexstem'}) {
458      my $indexstem = $buildcfg->{'indexstem'};
459      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
460    }
461    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
462      &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
463    }
464    if (defined $buildcfg->{'infodbtype'}) {
465        my $infodbtype = $buildcfg->{'infodbtype'};
466        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
467    }
468
469    &write_line('COLCFG', ["</serviceRack>"]);
470
471    # do the browse service
472    my $count = 1;
473    my $phind = 0;
474    my $started_classifiers = 0;
475
476    my $classifiers = $collectcfg->{"classify"};
477    foreach my $cl (@$classifiers) {
478      my $name = "CL$count";
479      $count++;
480      my ($classname) = @$cl[0];
481      if ($classname =~ /^phind$/i) {
482    $phind=1;
483    #should add it into coll config classifiers
484    next;
485      }
486     
487      if (not $started_classifiers) {
488    &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
489    if (defined $buildcfg->{'indexstem'}) {
490      my $indexstem = $buildcfg->{'indexstem'};
491      &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);     
492    }
493    if (defined $buildcfg->{'infodbtype'}) {
494        my $infodbtype = $buildcfg->{'infodbtype'};
495        &write_line('COLCFG', ["<databaseType name=\"", $infodbtype, "\" />"]);     
496    }
497    &write_line('COLCFG', ["<classifierList>"]);       
498    $started_classifiers = 1;
499      }
500      my $content = ''; #use buttonname first, then metadata
501      if ($classname eq "DateList") {
502    $content = "Date";
503      } else {
504    for (my $j=0; $j<scalar(@$cl); $j++) {
505      my $arg = @$cl[$j];
506      if ($arg eq "-buttonname"){
507        $content = @$cl[$j+1];
508        last;
509      } elsif ($arg eq "-metadata") {
510        $content = @$cl[$j+1];
511      }
512     
513    }
514      }
515      &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
516    }     
517    if ($started_classifiers) {
518      # end the classifiers
519      &write_line('COLCFG', ["</classifierList>"]);
520      # close off the Browse service
521      &write_line('COLCFG', ["</serviceRack>"]);
522    }
523   
524    # the phind classifier is a separate service
525    if ($phind) {
526    # if phind classifier
527    &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
528    }
529
530   
531    &write_line('COLCFG', ["</serviceRackList>"]);
532    &write_line('COLCFG', ["</buildConfig>"]);
533
534    close (COLCFG);
535  }
536
537
538#########################################################
539
5401;
Note: See TracBrowser for help on using the browser.