[14020] | 1 | ###########################################################################
|
---|
| 2 | #
|
---|
| 3 | # cfgread4gs3.pm --
|
---|
| 4 | # A component of the Greenstone digital library software
|
---|
| 5 | # from the New Zealand Digital Library Project at the
|
---|
| 6 | # University of Waikato, New Zealand.
|
---|
| 7 | #
|
---|
| 8 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
| 9 | #
|
---|
| 10 | # This program is free software; you can redistribute it and/or modify
|
---|
| 11 | # it under the terms of the GNU General Public License as published by
|
---|
| 12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | # (at your option) any later version.
|
---|
| 14 | #
|
---|
| 15 | # This program is distributed in the hope that it will be useful,
|
---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | # GNU General Public License for more details.
|
---|
| 19 | #
|
---|
| 20 | # You should have received a copy of the GNU General Public License
|
---|
| 21 | # along with this program; if not, write to the Free Software
|
---|
| 22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | #
|
---|
| 24 | ###########################################################################
|
---|
| 25 |
|
---|
| 26 | # reads in configuration files of xml form
|
---|
| 27 |
|
---|
| 28 | package cfgread4gs3;
|
---|
| 29 | use strict;
|
---|
| 30 | no strict 'refs';
|
---|
[14101] | 31 | no strict 'subs';
|
---|
| 32 |
|
---|
| 33 | # Wrapper that ensures the right version of XML::Parser is loaded given
|
---|
| 34 | # the version of Perl being used. Need to distinguish between Perl 5.6 and
|
---|
| 35 | # Perl 5.8
|
---|
| 36 | sub BEGIN {
|
---|
| 37 | my $perl_dir;
|
---|
| 38 |
|
---|
| 39 | # Note: $] encodes the version number of perl
|
---|
| 40 | if ($]>5.008) {
|
---|
| 41 | # perl 5.8.1 or above
|
---|
| 42 | $perl_dir = "perl-5.8";
|
---|
| 43 | }
|
---|
| 44 | elsif ($]<5.008) {
|
---|
| 45 | # assume perl 5.6
|
---|
| 46 | $perl_dir = "perl-5.6";
|
---|
| 47 | }
|
---|
| 48 | else {
|
---|
| 49 | print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
|
---|
| 50 | print STDERR " Please upgrade to a newer version of Perl.\n";
|
---|
| 51 | $perl_dir = "perl-5.8";
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | if ($ENV{'GSDLOS'} !~ /^windows$/i) {
|
---|
| 55 | # Use push to put this on the end, so an existing XML::Parser will be used by default
|
---|
| 56 | push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
|
---|
| 57 | }
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | use XML::Parser;
|
---|
| 61 |
|
---|
[14020] | 62 | # A mapping hash to resolve name descrepency between gs2 and gs3.
|
---|
| 63 | my $nameMap = {"key" => "value",
|
---|
| 64 | "creator" => "creator",
|
---|
| 65 | "maintainer" => "maintainer",
|
---|
| 66 | "public" => "public",
|
---|
| 67 | "defaultIndex" => "defaultindex",
|
---|
| 68 | "defaultLevel" => "defaultlevel",
|
---|
| 69 | "name" => "collectionname",
|
---|
| 70 | "description" => "collectionextra",
|
---|
| 71 | "smallicon" => "iconcollectionsmall",
|
---|
| 72 | "icon" => "iconcollection",
|
---|
| 73 | "level" => "levels",
|
---|
| 74 | "classifier" => "classify",
|
---|
| 75 | "indexSubcollection" => "indexsubcollections",
|
---|
| 76 | "indexLanguage" => "languages",
|
---|
| 77 | "defaultIndexLanguage" => "defaultlanguage",
|
---|
| 78 | "index" => "indexes",
|
---|
| 79 | "plugin" => "plugin",
|
---|
| 80 | "indexOption" => "indexoptions",
|
---|
| 81 | "searchType" => "searchtype",
|
---|
| 82 | "languageMetadata" => "languagemetadata",
|
---|
| 83 | };
|
---|
| 84 | # A hash structure which is returned by sub read_cfg_file.
|
---|
| 85 | my $data = {};
|
---|
| 86 |
|
---|
| 87 | my $repeatedBlock = q/^(browse|pluginList)$/;
|
---|
| 88 |
|
---|
| 89 | # use those unique attribute values to locate the text within the elements
|
---|
| 90 | # creator, public, maintainer.
|
---|
| 91 | my $currentLocation = "";
|
---|
| 92 | my $stringexp = q/^(creator|maintainer|public)$/;
|
---|
| 93 |
|
---|
| 94 | my $currentLevel = "";
|
---|
| 95 |
|
---|
| 96 | # Count the elements with same name within the same block
|
---|
| 97 | # ("plugin", "option")
|
---|
| 98 | my $currentIndex = 0;
|
---|
| 99 | my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
|
---|
| 100 | my $arrayarrayexp= q/^(plugin|classifier)$/;
|
---|
| 101 |
|
---|
| 102 | my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
|
---|
| 103 |
|
---|
| 104 | sub StartTag {
|
---|
| 105 | # Those marked with #@ will not be executed at the same time when this sub is being called
|
---|
| 106 | # so that if/elsif is used to avoid unnecessary tests
|
---|
| 107 | my ($expat, $element) = @_;
|
---|
| 108 |
|
---|
| 109 | my $name = $_{'name'};
|
---|
| 110 | my $value = $_{'value'};
|
---|
| 111 | my $type = $_{'type'};
|
---|
| 112 |
|
---|
| 113 | # for subcollections
|
---|
| 114 | my $filter = $_{'filter'};
|
---|
| 115 |
|
---|
| 116 | #@ Marking repeated block
|
---|
| 117 | if ($element =~ /$repeatedBlock/) {
|
---|
| 118 | $currentIndex = 0;
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | #@ handling block metadataList
|
---|
| 122 | elsif (defined $name and $name =~ /$stringexp/){
|
---|
| 123 | $currentLocation = $name;
|
---|
| 124 | }
|
---|
| 125 | #@ handling default search index/level/indexLanguage and languageMetadata
|
---|
| 126 | elsif ($element =~ /$defaults/) {
|
---|
| 127 | if (defined $name and $name =~ /\w/) {
|
---|
| 128 | $data->{$nameMap->{$element}} = $name;
|
---|
| 129 | }
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | #@ Handling indexer: mgpp/mg/lucene; stringexp
|
---|
| 133 | elsif ($element eq "search") {
|
---|
| 134 | $data->{'buildtype'} = $type;
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 | #@ Handling searchtype: plain,form; arrayexp
|
---|
| 138 | #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
|
---|
| 139 | #@ Handling searchtype: plain, form
|
---|
| 140 | #$currentLocation = $name;
|
---|
| 141 | #}
|
---|
| 142 |
|
---|
| 143 |
|
---|
| 144 |
|
---|
| 145 | #@ Handle index|level|indexSubcollection|indexLanguage
|
---|
| 146 | elsif ($element =~ /$arrayexp/) {
|
---|
| 147 | my $key = $nameMap->{$element};
|
---|
| 148 | if (!defined $data->{$key}) {
|
---|
| 149 | $data->{$key} = [];
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | push (@{$data->{$key}},$name);
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | #@ indexoptions: accentfold/casefold/stem; arrayexp
|
---|
| 156 | elsif ($element eq "indexOption") {
|
---|
| 157 | $currentLevel = "indexOption";
|
---|
| 158 | }
|
---|
| 159 | if ($currentLevel eq "indexOption" and $element eq "option") {
|
---|
| 160 | my $key = $nameMap->{$currentLevel};
|
---|
| 161 | if (!defined $data->{$key}) {
|
---|
| 162 | $data->{$key} = [];
|
---|
| 163 | }
|
---|
| 164 | push (@{$data->{$key}},$name);
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 | #@ use hash of hash of strings: hashexp
|
---|
| 168 | elsif ($element eq "subcollection") {
|
---|
| 169 | if (!defined $data->{'subcollection'}) {
|
---|
| 170 | $data->{'subcollection'} = {};
|
---|
| 171 | }
|
---|
| 172 | if (defined $name and $name =~ /\w/) {
|
---|
| 173 | if (defined $filter and $filter =~ /\w/) {
|
---|
| 174 | $data->{'subcollection'}->{$name} = $filter;
|
---|
| 175 |
|
---|
| 176 | }
|
---|
| 177 | }
|
---|
| 178 | }
|
---|
| 179 |
|
---|
| 180 | #@ Handling each classifier/plugin element
|
---|
| 181 | elsif ($element =~ /$arrayarrayexp/) {
|
---|
| 182 | # find the gs2 mapping name
|
---|
| 183 | $currentLevel = $element;
|
---|
| 184 | my $key = $nameMap->{$element};
|
---|
| 185 |
|
---|
| 186 | # define an array of array of strings foreach $k (@{$data->{$key}}) {
|
---|
| 187 | if (!defined $data->{$key}) {
|
---|
| 188 | $data->{$key} = [];
|
---|
| 189 | }
|
---|
| 190 | # Push classifier/plugin name (e.g. AZList) into $data as the first string
|
---|
| 191 | push (@{$data->{$key}->[$currentIndex]},$name);
|
---|
| 192 | #print $currentIndex."indexup\n";
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | #@ Handling the option elements in each classifier/plugin element (as the following strings)
|
---|
| 196 | elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
|
---|
| 197 | # find the gs2 mapping name for classifier and plugin
|
---|
| 198 | my $key = $nameMap->{$currentLevel};
|
---|
| 199 |
|
---|
| 200 | if (defined $name and $name =~ /\w/) {
|
---|
| 201 | push (@{$data->{$key}->[$currentIndex]}, $name);
|
---|
| 202 | }
|
---|
| 203 | if (defined $value and $value =~ /\w/) {
|
---|
| 204 | push (@{$data->{$key}->[$currentIndex]}, $value);
|
---|
| 205 | }
|
---|
| 206 |
|
---|
| 207 | }
|
---|
| 208 |
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | sub EndTag {
|
---|
| 212 | my ($expat, $element) = @_;
|
---|
| 213 | my $endTags = q/^(browse|pluginList)$/;
|
---|
| 214 | if ($element =~ /$endTags/) {
|
---|
| 215 | $currentIndex = 0;
|
---|
| 216 | $currentLevel = "";
|
---|
| 217 | }
|
---|
| 218 | # $arrayarrayexp contains classifier|plugin
|
---|
| 219 | elsif($element =~ /$arrayarrayexp/){
|
---|
| 220 | $currentIndex = $currentIndex + 1;
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 | sub Text {
|
---|
| 226 | #@ Handling block metadataList(creator, maintainer, public)
|
---|
| 227 | if (defined $currentLocation and $currentLocation =~ /$stringexp/){
|
---|
| 228 | #print $currentLocation;
|
---|
| 229 | my $key = $nameMap->{$currentLocation};
|
---|
| 230 | $data->{$key} = $_;
|
---|
| 231 | undef $currentLocation;
|
---|
| 232 | }
|
---|
| 233 | #@ Handling searchtype: plain,form; arrayexp
|
---|
| 234 | if (defined $currentLocation and $currentLocation =~ /searchType/) {
|
---|
| 235 | # map 'searchType' into 'searchtype'
|
---|
| 236 | my $key = $nameMap->{$currentLocation};
|
---|
| 237 | # split it by ','
|
---|
| 238 | my ($plain, $form) = split (",", $_);
|
---|
| 239 |
|
---|
| 240 | if (!defined $data->{$key}) {
|
---|
| 241 | $data->{$key} = [];
|
---|
| 242 | }
|
---|
| 243 | if (defined $plain and $plain =~ /\w/) {
|
---|
| 244 | push @{ $data->{$key} }, $plain;
|
---|
| 245 | }
|
---|
| 246 | if (defined $form and $form =~ /\w/) {
|
---|
| 247 | push @{ $data->{$key} }, $form;
|
---|
| 248 | }
|
---|
| 249 | }
|
---|
| 250 | }
|
---|
| 251 | # This sub is for debugging purposes
|
---|
| 252 | sub Display {
|
---|
| 253 | # metadataList
|
---|
| 254 |
|
---|
| 255 | print $data->{'creator'}."\n" if (defined $data->{'creator'});
|
---|
| 256 | print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
|
---|
| 257 | print $data->{"public"}."\n" if (defined $data->{"public"});
|
---|
| 258 | print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
|
---|
| 259 | print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
|
---|
| 260 | print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
|
---|
| 261 | print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
|
---|
| 262 | print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
|
---|
| 263 | print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
|
---|
| 264 | print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
|
---|
| 265 | print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
|
---|
| 266 | print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
|
---|
| 267 | print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
|
---|
| 268 |
|
---|
| 269 | if (defined $data->{'plugin'}) {
|
---|
| 270 | foreach $a (@{$data->{'plugin'}}) {
|
---|
| 271 | print join(",",@$a);
|
---|
| 272 | print "\n";
|
---|
| 273 | }
|
---|
| 274 | }
|
---|
| 275 | if (defined $data->{'classify'}) {
|
---|
| 276 | print "Classifiers: \n";
|
---|
| 277 | map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
|
---|
| 278 | }
|
---|
| 279 |
|
---|
| 280 | if (defined $data->{'subcollection'}) {
|
---|
| 281 | foreach my $key (keys %{$data->{'subcollection'}}) {
|
---|
| 282 | print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
|
---|
| 283 | }
|
---|
| 284 | }
|
---|
| 285 | }
|
---|
| 286 | sub Doctype {
|
---|
| 287 | my ($expat, $name, $sysid, $pubid, $internal) = @_;
|
---|
| 288 |
|
---|
| 289 | # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files
|
---|
| 290 | # to be processed as well as the "DirectoryMetadata" files which should now
|
---|
| 291 | # be created by import.pl
|
---|
| 292 | die if ($name !~ /^(Greenstone)?DirectoryMetadata$/);
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | # This Char function overrides the one in XML::Parser::Stream to overcome a
|
---|
| 296 | # problem where $expat->{Text} is treated as the return value, slowing
|
---|
| 297 | # things down significantly in some cases.
|
---|
| 298 | sub Char {
|
---|
| 299 | if ($]<5.008) {
|
---|
| 300 | use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
|
---|
| 301 | }
|
---|
| 302 | $_[0]->{'Text'} .= $_[1];
|
---|
| 303 | return undef;
|
---|
| 304 | }
|
---|
| 305 | # Reads in the model collection configuration file, collectionConfig.xml,
|
---|
| 306 | # into a structure which complies with the one used by gs2 (i.e. one read
|
---|
| 307 | # in by &cfgread::read_cfg_file).
|
---|
| 308 | sub read_cfg_file {
|
---|
| 309 | my ($filename) = @_;
|
---|
| 310 | $data = {};
|
---|
| 311 | if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
|
---|
| 312 | return undef;
|
---|
| 313 | }
|
---|
| 314 |
|
---|
| 315 | # create XML::Parser object for parsing metadata.xml files
|
---|
| 316 | my $parser;
|
---|
| 317 | if ($]<5.008) {
|
---|
| 318 | # Perl 5.6
|
---|
| 319 | $parser = new XML::Parser('Style' => 'Stream',
|
---|
| 320 | 'Handlers' => {'Char' => \&Char,
|
---|
| 321 | 'Doctype' => \&Doctype
|
---|
| 322 | });
|
---|
| 323 | }
|
---|
| 324 | else {
|
---|
| 325 | # Perl 5.8
|
---|
| 326 | $parser = new XML::Parser('Style' => 'Stream',
|
---|
| 327 | 'ProtocolEncoding' => 'ISO-8859-1',
|
---|
| 328 | 'Handlers' => {'Char' => \&Char,
|
---|
| 329 | 'Doctype' => \&Doctype
|
---|
| 330 | });
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | if (!open (COLCFG, $filename)) {
|
---|
| 334 | print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
|
---|
| 335 | } else {
|
---|
| 336 |
|
---|
| 337 | $parser->parsefile ($filename);# (COLCFG);
|
---|
| 338 | close (COLCFG);
|
---|
| 339 | }
|
---|
| 340 |
|
---|
| 341 | #&Display; print "***********";
|
---|
| 342 | return $data;
|
---|
| 343 | }
|
---|
| 344 |
|
---|
| 345 |
|
---|
| 346 | sub write_line {
|
---|
| 347 | my ($filehandle, $line) = @_;
|
---|
| 348 | print $filehandle join ("", @$line), "\n";
|
---|
| 349 | }
|
---|
| 350 |
|
---|
[14101] | 351 | # Create the buildConfig.xml file for a specific collection
|
---|
[14020] | 352 | sub write_cfg_file {
|
---|
[14101] | 353 | # information needed from $collectcfg include: defaultindex, defaultlevel, classifiers,
|
---|
[14020] | 354 | # the rest is from $buildcfg
|
---|
| 355 | my ($buildoutfile, $buildcfg, $collectcfg) = @_;
|
---|
| 356 | my $line = [];
|
---|
| 357 |
|
---|
| 358 | if (!open (COLCFG, ">$buildoutfile")) {
|
---|
| 359 | print STDERR "cfgread::write_cfg_file couldn't write the cfg file $buildoutfile\n";
|
---|
| 360 | die;
|
---|
| 361 | }
|
---|
| 362 |
|
---|
| 363 | &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]);
|
---|
| 364 |
|
---|
| 365 | # output building metadata to build config file
|
---|
| 366 | my $buildtype;
|
---|
| 367 | if (defined $buildcfg->{"buildtype"}) {
|
---|
| 368 | $buildtype = $buildcfg->{"buildtype"};
|
---|
| 369 | } else {
|
---|
| 370 | $buildtype = "mgpp";
|
---|
| 371 | }
|
---|
| 372 | my $numdocs;
|
---|
| 373 | if (defined $buildcfg->{"numdocs"}) {
|
---|
| 374 | $numdocs = $buildcfg->{"numdocs"};
|
---|
| 375 | }
|
---|
| 376 | &write_line('COLCFG', ["<metadataList>"]);
|
---|
| 377 | &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]);
|
---|
| 378 | &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]);
|
---|
| 379 | &write_line('COLCFG', ["</metadataList>"]);
|
---|
| 380 |
|
---|
| 381 | my $service_type = "MGPP";
|
---|
| 382 | if ($buildtype eq "mg") {
|
---|
| 383 | $service_type = "MG";
|
---|
| 384 | } elsif ($buildtype eq "lucene") {
|
---|
| 385 | $service_type = "Lucene";
|
---|
| 386 | }
|
---|
| 387 |
|
---|
| 388 | # output serviceRackList
|
---|
| 389 | &write_line('COLCFG', ["<serviceRackList>"]);
|
---|
| 390 |
|
---|
[14101] | 391 | # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIMetadataRetrieve.java class
|
---|
| 392 | # For each collection, we write the following serviceRack in the collection's buildConfig.xml file as follows and also specify the metadata format this oai service provides in the rack. But whether this service is going to be put in use depends on its name appearing in the OAIConfig.xml.
|
---|
| 393 | &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]);
|
---|
| 394 |
|
---|
| 395 | # What metadata sets to support is collection specific and is specified in each collection's buildConfig.xml file. To support other metadata schema, simply add an OAIMetadataFormat element here.
|
---|
| 396 | # The support of unqualified Dublin Core metadata set is mendatory in the oai specification.
|
---|
| 397 | &write_line('COLCFG', ["<OAIMetadataFormat name=\"Dublin Core\">"]);
|
---|
| 398 | &write_line('COLCFG', ["<metadataPrefix>oai_dc</metadataPrefix>"]);
|
---|
| 399 | &write_line('COLCFG', ["<metadataNamespace>dc</metadataNamespace>"]);
|
---|
| 400 | &write_line('COLCFG', ["</OAIMetadataFormat>"]);
|
---|
| 401 |
|
---|
| 402 | &write_line('COLCFG', ["</serviceRack>"]);
|
---|
| 403 |
|
---|
[14020] | 404 | # do the search service
|
---|
| 405 | &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]);
|
---|
| 406 | if (defined $buildcfg->{'indexstem'}) {
|
---|
| 407 | my $indexstem = $buildcfg->{'indexstem'};
|
---|
| 408 | &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
|
---|
| 409 | }
|
---|
| 410 |
|
---|
| 411 | #indexes
|
---|
| 412 | # maps index name to shortname
|
---|
| 413 | my $indexmap = {};
|
---|
| 414 | # keeps the order for indexes
|
---|
| 415 | my @indexlist = ();
|
---|
| 416 |
|
---|
| 417 | my $defaultindex = "";
|
---|
| 418 | my $first = 1;
|
---|
| 419 | my $maptype = "indexfieldmap";
|
---|
| 420 | if ($buildtype eq "mg") {
|
---|
| 421 | $maptype = "indexmap";
|
---|
| 422 | }
|
---|
| 423 | if (defined $buildcfg->{$maptype}) {
|
---|
| 424 | my $indexmap_t = $buildcfg->{$maptype};
|
---|
| 425 | foreach my $i (@$indexmap_t) {
|
---|
| 426 | my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
|
---|
| 427 | $indexmap->{$k} = $v;
|
---|
| 428 | push @indexlist, $k;
|
---|
| 429 | if ($first) {
|
---|
| 430 | $defaultindex = $v;
|
---|
| 431 | $first = 0;
|
---|
| 432 | }
|
---|
| 433 | }
|
---|
| 434 | # now if the user has assigned a default index, we use it
|
---|
| 435 | if (defined $collectcfg->{"defaultindex"}) {
|
---|
| 436 | $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}};
|
---|
| 437 | }
|
---|
| 438 |
|
---|
| 439 | } else {
|
---|
| 440 | print STDERR "$maptype not defined";
|
---|
| 441 | }
|
---|
| 442 | #for each index in indexList, write them out
|
---|
| 443 | &write_line('COLCFG', ["<indexList>"]);
|
---|
| 444 | foreach my $i (@indexlist) {
|
---|
| 445 | my $index = $indexmap->{$i};
|
---|
| 446 | &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]);
|
---|
| 447 | }
|
---|
| 448 | &write_line('COLCFG', ["</indexList>"]);
|
---|
| 449 |
|
---|
| 450 | # do default index only for mg
|
---|
| 451 | if ($buildtype eq "mg") {
|
---|
| 452 | &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
|
---|
| 453 | }
|
---|
| 454 |
|
---|
| 455 | # do indexOptionList
|
---|
| 456 | if ($buildtype eq "mg" || $buildtype eq "mgpp") {
|
---|
| 457 | &write_line('COLCFG', ["<indexOptionList>"]);
|
---|
| 458 | my $stemindexes = 3; # default is stem and casefold
|
---|
| 459 | if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
|
---|
| 460 | $stemindexes = $buildcfg->{'stemindexes'};
|
---|
| 461 | }
|
---|
| 462 | &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]);
|
---|
| 463 |
|
---|
| 464 | my $maxnumeric = 4; # default
|
---|
| 465 | if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
|
---|
| 466 | $maxnumeric = $buildcfg->{'maxnumeric'};
|
---|
| 467 | }
|
---|
| 468 | &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]);
|
---|
| 469 | &write_line('COLCFG', ["</indexOptionList>"]);
|
---|
| 470 | }
|
---|
| 471 |
|
---|
| 472 | # levelList
|
---|
| 473 | my $levelmap = {};
|
---|
| 474 | my @levellist = ();
|
---|
| 475 | my $default_search_level = "";
|
---|
| 476 | my $default_retrieve_level = "Doc";#this is defaultGDBMLevel (also for the retrieve service)
|
---|
| 477 | $first = 1;
|
---|
| 478 | if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
|
---|
| 479 | if (defined $buildcfg->{'levelmap'}) {
|
---|
| 480 | my $levelmap_t = $buildcfg->{'levelmap'};
|
---|
| 481 | foreach my $l (@$levelmap_t) {
|
---|
| 482 | my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/;
|
---|
| 483 | $levelmap->{$key} = $val;
|
---|
| 484 | push @levellist, $key;
|
---|
| 485 | if ($first) {
|
---|
| 486 | $default_search_level = $val;
|
---|
| 487 | $first = 0;
|
---|
| 488 | }
|
---|
| 489 | }
|
---|
| 490 | }
|
---|
| 491 | # now if the user has assigned a default level, we use it
|
---|
| 492 | if (defined $collectcfg->{"defaultlevel"}) {
|
---|
[14101] | 493 | $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}};
|
---|
[14020] | 494 | $default_retrieve_level = $default_search_level;
|
---|
| 495 | }
|
---|
| 496 | #if (defined $buildcfg->{'textlevel'}) {
|
---|
| 497 | # $default_retrieve_level = $buildcfg->{'textlevel'};
|
---|
| 498 | #}
|
---|
| 499 | }
|
---|
[14101] | 500 | #for each level in levelList, write them out
|
---|
[14020] | 501 | if ($buildtype ne "mg") {
|
---|
| 502 | &write_line('COLCFG', ["<levelList>"]);
|
---|
| 503 | foreach my $lv (@levellist) {
|
---|
| 504 | my $level = $levelmap->{$lv};
|
---|
| 505 | &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]);
|
---|
| 506 | }
|
---|
[14101] | 507 | &write_line('COLCFG', ["</levelList>"]);
|
---|
[14020] | 508 | }
|
---|
| 509 | # add in defaultLevel as the same level as indexLevelList, making the reading job easier
|
---|
| 510 | if ($buildtype eq "lucene" || $buildtype eq "mgpp") {
|
---|
| 511 | &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]);
|
---|
| 512 | }
|
---|
| 513 | &write_line('COLCFG', ["<defaultGDBMLevel shortname=\"", $default_retrieve_level, "\" />"]);
|
---|
| 514 |
|
---|
| 515 | # do searchTypeList
|
---|
| 516 | if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
|
---|
| 517 | &write_line('COLCFG', ["<searchTypeList>"]);
|
---|
| 518 |
|
---|
| 519 | if (defined $buildcfg->{"searchtype"}) {
|
---|
| 520 | my $searchtype_t = $buildcfg->{"searchtype"};
|
---|
| 521 | foreach my $s (@$searchtype_t) {
|
---|
| 522 | &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]);
|
---|
| 523 | }
|
---|
| 524 | } else {
|
---|
| 525 | &write_line('COLCFG', ["<searchType name=\"plain\" />"]);
|
---|
| 526 | &write_line('COLCFG', ["<searchType name=\"form\" />"]);
|
---|
| 527 | }
|
---|
| 528 | &write_line('COLCFG', ["</searchTypeList>"]);
|
---|
| 529 | }
|
---|
| 530 |
|
---|
| 531 | # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap]
|
---|
| 532 | $first = 1;
|
---|
| 533 | my $default_lang = "";
|
---|
| 534 | my $default_lang_short = "";
|
---|
| 535 | if (defined $buildcfg->{"languagemap"}) {
|
---|
| 536 | &write_line('COLCFG', ["<indexLanguageList>"]);
|
---|
| 537 |
|
---|
| 538 | my $langmap_t = $buildcfg->{"languagemap"};
|
---|
| 539 | foreach my $l (@$langmap_t) {
|
---|
| 540 | my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
|
---|
| 541 |
|
---|
| 542 | &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]);
|
---|
| 543 | if ($first) {
|
---|
| 544 | $default_lang = $k; #name
|
---|
| 545 | $default_lang_short = $v; #shortname
|
---|
| 546 | $first = 0;
|
---|
| 547 | }
|
---|
| 548 | }
|
---|
| 549 |
|
---|
| 550 | &write_line('COLCFG', ["</indexLanguageList>"]);
|
---|
| 551 | # now if the user has assigned a default language (as "en", "ru" etc.)
|
---|
| 552 | if (defined $collectcfg->{"defaultlanguage"}) {
|
---|
| 553 | $default_lang = $collectcfg->{"defaultlanguage"};
|
---|
| 554 | }
|
---|
| 555 | &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]);
|
---|
| 556 | }
|
---|
| 557 |
|
---|
| 558 |
|
---|
| 559 | # do indexSubcollectionList
|
---|
| 560 | my $default_subcol = "";# make it in sub scope to be used in the concatenation
|
---|
| 561 | if (defined $buildcfg->{'subcollectionmap'}) {
|
---|
| 562 | &write_line('COLCFG', ["<indexSubcollectionList>"]);
|
---|
| 563 | my $subcolmap = {};
|
---|
| 564 | my @subcollist = ();
|
---|
| 565 | $first = 1;
|
---|
| 566 | my $subcolmap_t = $buildcfg->{'subcollectionmap'};
|
---|
| 567 | foreach my $l (@$subcolmap_t) {
|
---|
| 568 | my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
|
---|
| 569 | $subcolmap->{$k} = $v;
|
---|
| 570 | push @subcollist, $k;
|
---|
| 571 | if ($first) {
|
---|
| 572 | $default_subcol = $v;
|
---|
| 573 | $first = 0;
|
---|
| 574 | }
|
---|
| 575 | }
|
---|
| 576 | foreach my $sl (@subcollist) {
|
---|
| 577 | my $subcol = $subcolmap->{$sl};
|
---|
| 578 | &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]);
|
---|
| 579 | }
|
---|
| 580 |
|
---|
| 581 | &write_line('COLCFG', ["</indexSubcollectionList>"]);
|
---|
| 582 | &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
|
---|
| 583 | }
|
---|
| 584 |
|
---|
| 585 | # close off search service
|
---|
| 586 | &write_line('COLCFG', ["</serviceRack>"]);
|
---|
| 587 |
|
---|
| 588 | # do the retrieve service
|
---|
| 589 | &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]);
|
---|
| 590 |
|
---|
| 591 | # do default index
|
---|
| 592 | if (defined $buildcfg->{"languagemap"}) {
|
---|
| 593 | &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]);
|
---|
| 594 | }
|
---|
| 595 | if (defined $buildcfg->{'subcollectionmap'}) {
|
---|
| 596 | &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]);
|
---|
| 597 | }
|
---|
| 598 | if ($buildtype eq "mg") {
|
---|
| 599 | &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]);
|
---|
| 600 | }
|
---|
| 601 |
|
---|
| 602 | if (defined $buildcfg->{'indexstem'}) {
|
---|
| 603 | my $indexstem = $buildcfg->{'indexstem'};
|
---|
| 604 | &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
|
---|
| 605 | }
|
---|
| 606 | if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
|
---|
| 607 | &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]);
|
---|
| 608 | }
|
---|
| 609 | &write_line('COLCFG', ["</serviceRack>"]);
|
---|
| 610 |
|
---|
| 611 | # do the browse service
|
---|
| 612 | my $count = 1;
|
---|
| 613 | my $phind = 0;
|
---|
| 614 | my $started_classifiers = 0;
|
---|
| 615 |
|
---|
| 616 | my $classifiers = $collectcfg->{"classify"};
|
---|
| 617 | foreach my $cl (@$classifiers) {
|
---|
| 618 | my $name = "CL$count";
|
---|
| 619 | $count++;
|
---|
| 620 | my ($classname) = @$cl[0];
|
---|
| 621 | if ($classname =~ /^phind$/i) {
|
---|
| 622 | $phind=1;
|
---|
| 623 | #should add it into coll config classifiers
|
---|
| 624 | next;
|
---|
| 625 | }
|
---|
| 626 |
|
---|
| 627 | if (not $started_classifiers) {
|
---|
| 628 | &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]);
|
---|
| 629 | if (defined $buildcfg->{'indexstem'}) {
|
---|
| 630 | my $indexstem = $buildcfg->{'indexstem'};
|
---|
| 631 | &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]);
|
---|
| 632 | }
|
---|
| 633 | &write_line('COLCFG', ["<classifierList>"]);
|
---|
| 634 | $started_classifiers = 1;
|
---|
| 635 | }
|
---|
| 636 | my $content = ''; #use buttonname first, then metadata
|
---|
| 637 | if ($classname eq "DateList") {
|
---|
| 638 | $content = "Date";
|
---|
| 639 | } else {
|
---|
| 640 | for (my $j=0; $j<scalar(@$cl); $j++) {
|
---|
| 641 | my $arg = @$cl[$j];
|
---|
| 642 | if ($arg eq "-buttonname"){
|
---|
| 643 | $content = @$cl[$j+1];
|
---|
| 644 | last;
|
---|
| 645 | } elsif ($arg eq "-metadata") {
|
---|
| 646 | $content = @$cl[$j+1];
|
---|
| 647 | }
|
---|
| 648 |
|
---|
| 649 | }
|
---|
| 650 | }
|
---|
| 651 | &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]);
|
---|
| 652 | }
|
---|
| 653 | if ($started_classifiers) {
|
---|
| 654 | # end the classifiers
|
---|
| 655 | &write_line('COLCFG', ["</classifierList>"]);
|
---|
| 656 | # close off the Browse service
|
---|
| 657 | &write_line('COLCFG', ["</serviceRack>"]);
|
---|
| 658 | }
|
---|
| 659 |
|
---|
| 660 | # the phind classifier is a separate service
|
---|
| 661 | if ($phind) {
|
---|
| 662 | # if phind classifier
|
---|
| 663 | &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]);
|
---|
| 664 | }
|
---|
| 665 |
|
---|
| 666 | &write_line('COLCFG', ["</serviceRackList>"]);
|
---|
| 667 | &write_line('COLCFG', ["</buildConfig>"]);
|
---|
| 668 |
|
---|
| 669 | close (COLCFG);
|
---|
| 670 | }
|
---|
| 671 |
|
---|
| 672 |
|
---|
| 673 | #########################################################
|
---|
| 674 |
|
---|
| 675 | 1;
|
---|