Changeset 20099
- Timestamp:
- 2009-07-29T13:23:33+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/collConfigxml.pm
r20096 r20099 112 112 my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/; 113 113 114 # Reads in the model collection configuration file, collectionConfig.xml, 115 # into a structure which complies with the one used by gs2 (i.e. one read 116 # in by &cfgread::read_cfg_file). 117 sub read_cfg_file { 118 my ($filename) = @_; 119 $data = {}; 120 if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) { 121 return undef; 122 } 123 124 # create XML::Parser object for parsing metadata.xml files 125 my $parser; 126 if ($]<5.008) { 127 # Perl 5.6 128 $parser = new XML::Parser('Style' => 'Stream', 129 'Handlers' => {'Char' => \&Char, 130 'Doctype' => \&Doctype 131 }); 132 } 133 else { 134 # Perl 5.8 135 $parser = new XML::Parser('Style' => 'Stream', 136 'ProtocolEncoding' => 'ISO-8859-1', 137 'Handlers' => {'Char' => \&Char, 138 'Doctype' => \&Doctype 139 }); 140 } 141 142 if (!open (COLCFG, $filename)) { 143 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; 144 } else { 145 146 $parser->parsefile ($filename);# (COLCFG); 147 close (COLCFG); 148 } 149 150 #&Display; 151 return $data; 152 } 153 114 154 sub StartTag { 115 155 # Those marked with #@ will not be executed at the same time when this sub is being called … … 129 169 my $filter = $_{'filter'}; 130 170 131 132 133 134 135 171 # for flax activities 172 my $desid = $_{'desid'}; 173 my $assigned = $_{'assigned'}; 174 my $lang = $_{'lang'}; 175 136 176 #@ Marking repeated block 137 177 if ($element =~ /$repeatedBlock/) { … … 410 450 } 411 451 412 # Reads in the model collection configuration file, collectionConfig.xml, 413 # into a structure which complies with the one used by gs2 (i.e. one read 414 # in by &cfgread::read_cfg_file). 415 sub read_cfg_file { 416 my ($filename) = @_; 417 $data = {}; 418 if (($filename !~ /collectionConfig\.xml$/ && $filename !~ /buildConfig\.xml$/) || !-f $filename) { 419 return undef; 420 } 421 422 # create XML::Parser object for parsing metadata.xml files 423 my $parser; 424 if ($]<5.008) { 425 # Perl 5.6 426 $parser = new XML::Parser('Style' => 'Stream', 427 'Handlers' => {'Char' => \&Char, 428 'Doctype' => \&Doctype 429 }); 430 } 431 else { 432 # Perl 5.8 433 $parser = new XML::Parser('Style' => 'Stream', 434 'ProtocolEncoding' => 'ISO-8859-1', 435 'Handlers' => {'Char' => \&Char, 436 'Doctype' => \&Doctype 437 }); 438 } 439 440 if (!open (COLCFG, $filename)) { 441 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; 442 } else { 443 444 $parser->parsefile ($filename);# (COLCFG); 445 close (COLCFG); 446 } 447 448 #&Display; 449 return $data; 450 } 451 452 453 sub write_line { 454 my ($filehandle, $line) = @_; 455 print $filehandle join ("", @$line), "\n"; 456 } 457 458 # Create the buildConfig.xml file for a specific collection 459 sub write_cfg_file { 460 # this sub is called in make_auxiliary_files() in basebuilder.pm 461 # the received args: $buildoutfile - destination file: buildConfig.xml 462 # $buildcfg - all build options, eg, disable_OAI 463 # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in collConfigxml.pm. 464 my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_; 465 my $line = []; 466 467 if (!open (COLCFG, ">$buildoutfile")) { 468 print STDERR "collConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n"; 469 die; 470 } 471 472 &write_line('COLCFG', ["<buildConfig xmlns:gsf=\"http://www.greenstone.org/greenstone3/schema/ConfigFormat\">"]); 473 474 # output building metadata to build config file 475 my $buildtype; 476 if (defined $buildcfg->{"buildtype"}) { 477 $buildtype = $buildcfg->{"buildtype"}; 478 } else { 479 $buildtype = "mgpp"; 480 } 481 my $numdocs; 482 if (defined $buildcfg->{"numdocs"}) { 483 $numdocs = $buildcfg->{"numdocs"}; 484 } 485 &write_line('COLCFG', ["<metadataList>"]); 486 &write_line('COLCFG', ["<metadata name=\"numDocs\">", $numdocs, "</metadata>"]); 487 &write_line('COLCFG', ["<metadata name=\"buildType\">", $buildtype, "</metadata>"]); 488 &write_line('COLCFG', ["</metadataList>"]); 489 490 my $service_type = "MGPP"; 491 if ($buildtype eq "mg") { 492 $service_type = "MG"; 493 } elsif ($buildtype eq "lucene") { 494 $service_type = "Lucene"; 495 } 496 497 # output serviceRackList 498 &write_line('COLCFG', ["<serviceRackList>"]); 499 500 # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class 501 # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml. 502 if ($disable_OAI == 0) { 503 &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]); 504 if (defined $buildcfg->{'indexstem'}) { 505 my $indexstem = $buildcfg->{'indexstem'}; 506 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]); 507 } 508 &write_line('COLCFG', ["</serviceRack>"]); 509 } 510 # do the search service 511 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Search\">"]); 512 if (defined $buildcfg->{'indexstem'}) { 513 my $indexstem = $buildcfg->{'indexstem'}; 514 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]); 515 } 516 517 #indexes 518 # maps index name to shortname 519 my $indexmap = {}; 520 # keeps the order for indexes 521 my @indexlist = (); 522 523 my $defaultindex = ""; 524 my $first = 1; 525 my $maptype = "indexfieldmap"; 526 if ($buildtype eq "mg") { 527 $maptype = "indexmap"; 528 } 529 530 #map {print $_."\n"} keys %$buildcfg; 531 532 if (defined $buildcfg->{$maptype}) { 533 my $indexmap_t = $buildcfg->{$maptype}; 534 foreach my $i (@$indexmap_t) { 535 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/; 536 $indexmap->{$k} = $v; 537 push @indexlist, $k; 538 if ($first) { 539 $defaultindex = $v; 540 $first = 0; 541 } 542 } 543 # now if the user has assigned a default index, we use it 544 if (defined $collectcfg->{"defaultindex"}) { 545 $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}}; 546 } 547 548 } else { 549 print STDERR "$maptype not defined"; 550 } 551 #for each index in indexList, write them out 552 &write_line('COLCFG', ["<indexList>"]); 553 foreach my $i (@indexlist) { 554 my $index = $indexmap->{$i}; 555 &write_line('COLCFG', ["<index name=\"", $i, "\" ", "shortname=\"", $index, "\" />"]); 556 } 557 &write_line('COLCFG', ["</indexList>"]); 558 559 560 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]); 561 562 563 # do indexOptionList 564 if ($buildtype eq "mg" || $buildtype eq "mgpp") { 565 &write_line('COLCFG', ["<indexOptionList>"]); 566 my $stemindexes = 3; # default is stem and casefold 567 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) { 568 $stemindexes = $buildcfg->{'stemindexes'}; 569 } 570 &write_line('COLCFG', ["<indexOption name=\"stemIndexes\" value=\"", $stemindexes, "\" />"]); 571 572 my $maxnumeric = 4; # default 573 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) { 574 $maxnumeric = $buildcfg->{'maxnumeric'}; 575 } 576 &write_line('COLCFG', ["<indexOption name=\"maxnumeric\" value=\"", $maxnumeric, "\" />"]); 577 &write_line('COLCFG', ["</indexOptionList>"]); 578 } 579 580 # levelList 581 my $levelmap = {}; 582 my @levellist = (); 583 my $default_search_level = "Doc"; 584 my $default_retrieve_level = "Doc"; 585 my $default_db_level = "Doc"; 586 $first = 1; 587 if ($buildtype eq "mgpp" || $buildtype eq "lucene") { 588 if (defined $buildcfg->{'levelmap'}) { 589 my $levelmap_t = $buildcfg->{'levelmap'}; 590 foreach my $l (@$levelmap_t) { 591 my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/; 592 $levelmap->{$key} = $val; 593 push @levellist, $key; 594 if ($first) { 595 # let default search level follow the first level in the level list 596 $default_search_level = $val; 597 # retrieve/database levels may get modified later if text level is defined 598 $default_retrieve_level = $val; 599 $default_db_level = $val; 600 $first = 0; 601 } 602 } 603 } 604 # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same. 605 #if (defined $collectcfg->{"defaultlevel"}) { 606 $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}}; 607 # $default_retrieve_level = $default_search_level; 608 #} 609 610 if (defined $buildcfg->{'textlevel'}) { 611 # let the retrieve/database levels always follow the textlevel 612 $default_retrieve_level = $buildcfg->{'textlevel'}; 613 $default_db_level = $buildcfg->{'textlevel'}; 614 615 } 616 } 617 #for each level in levelList, write them out 618 if ($buildtype ne "mg") { 619 &write_line('COLCFG', ["<levelList>"]); 620 foreach my $lv (@levellist) { 621 my $level = $levelmap->{$lv}; 622 &write_line('COLCFG', ["<level name=\"", $lv, "\" shortname=\"", $level, "\" />"]); 623 } 624 &write_line('COLCFG', ["</levelList>"]); 625 } 626 # add in defaultLevel as the same level as indexLevelList, making the reading job easier 627 if ($buildtype eq "lucene" || $buildtype eq "mgpp") { 628 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_search_level, "\" />"]); 629 } 630 if ($buildtype eq "lucene" || $buildtype eq "mgpp") { 631 &write_line('COLCFG', ["<defaultDBLevel shortname=\"", $default_db_level, "\" />"]); 632 } 633 # do searchTypeList 634 if ($buildtype eq "mgpp" || $buildtype eq "lucene") { 635 &write_line('COLCFG', ["<searchTypeList>"]); 636 637 if (defined $buildcfg->{"searchtype"}) { 638 my $searchtype_t = $buildcfg->{"searchtype"}; 639 foreach my $s (@$searchtype_t) { 640 &write_line('COLCFG', ["<searchType name=\"", $s, "\" />"]); 641 } 642 } else { 643 &write_line('COLCFG', ["<searchType name=\"plain\" />"]); 644 &write_line('COLCFG', ["<searchType name=\"form\" />"]); 645 } 646 &write_line('COLCFG', ["</searchTypeList>"]); 647 } 648 649 # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap] 650 $first = 1; 651 my $default_lang = ""; 652 my $default_lang_short = ""; 653 if (defined $buildcfg->{"languagemap"}) { 654 &write_line('COLCFG', ["<indexLanguageList>"]); 655 656 my $langmap_t = $buildcfg->{"languagemap"}; 657 foreach my $l (@$langmap_t) { 658 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; 659 660 &write_line('COLCFG', ["<indexLanguage name=\"", $k, "\" shortname=\"", $v, "\" />"]); 661 if ($first) { 662 $default_lang = $k; #name 663 $default_lang_short = $v; #shortname 664 $first = 0; 665 } 666 } 667 668 &write_line('COLCFG', ["</indexLanguageList>"]); 669 # now if the user has assigned a default language (as "en", "ru" etc.) 670 if (defined $collectcfg->{"defaultlanguage"}) { 671 $default_lang = $collectcfg->{"defaultlanguage"}; 672 } 673 &write_line('COLCFG', ["<defaultIndexLanguage name=\"", $default_lang, "\" shortname=\"", $default_lang_short, "\" />"]); 674 } 675 676 677 # do indexSubcollectionList 678 my $default_subcol = "";# make it in sub scope to be used in the concatenation 679 if (defined $buildcfg->{'subcollectionmap'}) { 680 &write_line('COLCFG', ["<indexSubcollectionList>"]); 681 my $subcolmap = {}; 682 my @subcollist = (); 683 $first = 1; 684 my $subcolmap_t = $buildcfg->{'subcollectionmap'}; 685 foreach my $l (@$subcolmap_t) { 686 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; 687 $subcolmap->{$k} = $v; 688 push @subcollist, $k; 689 if ($first) { 690 $default_subcol = $v; 691 $first = 0; 692 } 693 } 694 foreach my $sl (@subcollist) { 695 my $subcol = $subcolmap->{$sl}; 696 &write_line('COLCFG', ["<indexSubcollection name=\"", $sl, "\" shortname=\"", $subcol, "\" />"]); 697 } 698 699 &write_line('COLCFG', ["</indexSubcollectionList>"]); 700 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]); 701 } 702 703 # close off search service 704 &write_line('COLCFG', ["</serviceRack>"]); 705 706 # do the retrieve service 707 &write_line('COLCFG', ["<serviceRack name=\"GS2", $service_type, "Retrieve\">"]); 708 709 # do default index 710 if (defined $buildcfg->{"languagemap"}) { 711 &write_line('COLCFG', ["<defaultIndexLanguage shortname=\"", $default_lang, "\" />"]); 712 } 713 if (defined $buildcfg->{'subcollectionmap'}) { 714 &write_line('COLCFG', ["<defaultIndexSubcollection shortname=\"", $default_subcol, "\" />"]); 715 } 716 if ($buildtype eq "mg") { 717 &write_line('COLCFG', ["<defaultIndex shortname=\"", $defaultindex, "\" />"]); 718 } 719 720 if (defined $buildcfg->{'indexstem'}) { 721 my $indexstem = $buildcfg->{'indexstem'}; 722 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]); 723 } 724 if ($buildtype eq "mgpp" || $buildtype eq "lucene") { 725 &write_line('COLCFG', ["<defaultLevel shortname=\"", $default_retrieve_level, "\" />"]); 726 } 727 &write_line('COLCFG', ["</serviceRack>"]); 728 729 # do the browse service 730 my $count = 1; 731 my $phind = 0; 732 my $started_classifiers = 0; 733 734 my $classifiers = $collectcfg->{"classify"}; 735 foreach my $cl (@$classifiers) { 736 my $name = "CL$count"; 737 $count++; 738 my ($classname) = @$cl[0]; 739 if ($classname =~ /^phind$/i) { 740 $phind=1; 741 #should add it into coll config classifiers 742 next; 743 } 744 745 if (not $started_classifiers) { 746 &write_line('COLCFG', ["<serviceRack name=\"GS2Browse\">"]); 747 if (defined $buildcfg->{'indexstem'}) { 748 my $indexstem = $buildcfg->{'indexstem'}; 749 &write_line('COLCFG', ["<indexStem name=\"", $indexstem, "\" />"]); 750 } 751 &write_line('COLCFG', ["<classifierList>"]); 752 $started_classifiers = 1; 753 } 754 my $content = ''; #use buttonname first, then metadata 755 if ($classname eq "DateList") { 756 $content = "Date"; 757 } else { 758 for (my $j=0; $j<scalar(@$cl); $j++) { 759 my $arg = @$cl[$j]; 760 if ($arg eq "-buttonname"){ 761 $content = @$cl[$j+1]; 762 last; 763 } elsif ($arg eq "-metadata") { 764 $content = @$cl[$j+1]; 765 } 766 767 } 768 } 769 &write_line('COLCFG', ["<classifier name=\"", $name, "\" content=\"", $content, "\" />"]); 770 } 771 if ($started_classifiers) { 772 # end the classifiers 773 &write_line('COLCFG', ["</classifierList>"]); 774 # close off the Browse service 775 &write_line('COLCFG', ["</serviceRack>"]); 776 } 777 778 # the phind classifier is a separate service 779 if ($phind) { 780 # if phind classifier 781 &write_line('COLCFG', ["<serviceRack name=\"PhindPhraseBrowse\" />"]); 782 } 783 784 my $flaxActivities = $collectcfg->{"flaxActivity"}; 785 foreach my $fa (@$flaxActivities) { 786 # Six elements of the array for three attribute name/value pairs: name, desid, and lang. 787 if(defined $fa and @$fa[0] =~ /\w/ and @$fa[1] =~ /\w/ and @$fa[2] =~ /\w/ and @$fa[3] =~ /\w/ and @$fa[4] =~ /\w/ and @$fa[5] =~ /\w/) { 788 &write_line('COLCFG', ["<serviceRack type=\"flaxActivity\" ", @$fa[0], "=\"", @$fa[1], "\" ", @$fa[2], "=\"", @$fa[3], "\" ", @$fa[4], "=\"", @$fa[5], "\" />"]); 789 } 790 } 791 792 &write_line('COLCFG', ["</serviceRackList>"]); 793 &write_line('COLCFG', ["</buildConfig>"]); 794 795 close (COLCFG); 796 } 452 797 453 798 454
Note:
See TracChangeset
for help on using the changeset viewer.