Changeset 4794 for trunk/gsdl/perllib
- Timestamp:
- 2003-06-25T11:11:52+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/mgppbuilder.pm
r4768 r4794 35 35 36 36 BEGIN { 37 # set autoflush on for STDERR and STDOUT so that mg 37 # set autoflush on for STDERR and STDOUT so that mgpp 38 38 # doesn't get out of sync with plugins 39 39 STDOUT->autoflush(1); … … 82 82 'People'=>'PE', 83 83 'PE'=>1, 84 ' AllFields'=>'ZZ',84 'allfields'=>'ZZ', 85 85 'ZZ'=>1, 86 ' TextOnly'=>'TX',86 'text'=>'TX', 87 87 'TX'=>1, 88 88 'AND'=>1, … … 381 381 382 382 # create the mapping between the index descriptions 383 # and their directory names 383 # and their directory names (includes subcolls and langs) 384 384 $self->{'index_mapping'} = $self->create_index_mapping ($indexes); 385 385 … … 440 440 # store the mapping orders as well as the maps 441 441 # also put index, subcollection and language fields into the mapping thing - 442 # (the full index name (eg document:text:subcol:lang) is not used on442 # (the full index name (eg text:subcol:lang) is not used on 443 443 # the query page) -these are used for collectionmeta later on 444 444 if (!defined $mapping{'indexmap'}{"$fields"}) { … … 621 621 } 622 622 623 # set up the document process or623 # set up the document processr 624 624 $self->{'buildproc'}->set_output_handle ($handle); 625 625 $self->{'buildproc'}->set_mode ('text'); … … 686 686 system ("mgpp_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra"); 687 687 688 688 #define the final field lists 689 $self->make_final_field_list(); 690 689 691 # remove unwanted files 690 692 my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir); … … 701 703 } 702 704 closedir (DIR); 703 }705 } 704 706 } 705 707 … … 725 727 726 728 # define the indexed field mapping if not already done so (ie if infodb called separately from build_index) 727 if (scalar(keys %{$self->{'buildproc'}->{'indexfieldmap'}}) == 0) { 728 #check build.cfg to see if indexfields have been filled in 729 $buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "building/build.cfg"); 730 if (-e $buildconfigfile) { 731 $buildcfg = &colcfg::read_build_cfg( $buildconfigfile); 732 if (defined $buildcfg->{'indexfields'}) { 733 foreach $field (@{$buildcfg->{'indexfields'}}) { 734 $self->{'buildproc'}->{'indexfields'}->{$field} = 1; 735 } 736 } 737 if (defined $buildcfg->{'indexfieldmap'}) { 738 foreach $field (@{$buildcfg->{'indexfieldmap'}}) { 739 ($f, $v) = $field =~ /^(.*)\-\>(.*)$/; 740 $self->{'buildproc'}->{'indexfieldmap'}->{$f} = $v; 741 } 742 } 743 } 744 } 745 729 if (!defined $self->{'build_cfg'}) { 730 $self->read_final_field_list(); 731 } 746 732 print $outhandle "\n*** creating the info database and processing associated files\n" 747 733 if ($self->{'verbosity'} >= 1); … … 772 758 $self->{'buildproc'}->reset(); 773 759 760 # do the collection info 761 print $handle "[collection]\n"; 762 763 # first do the collection meta stuff - everything without a dot 764 my $collmetadefined = 0; 774 765 if (defined $self->{'collect_cfg'}->{'collectionmeta'}) { 775 776 if (!defined $self->{'index_mapping'}) { 777 $self->{'index_mapping'} = 778 $self->create_index_mapping ($self->{'collect_cfg'}->{'indexes'}); 779 } 780 781 print $handle "[collection]\n"; 782 766 $collmetadefined = 1; 783 767 foreach $cmeta (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}})) { 784 my $defaultfound=0; 785 my $first=1; 786 my $metadata_entry = ""; 787 my $default=""; 788 my $cmetamap = ""; 789 if ($cmeta =~ s/^\.//) { 790 if (defined $self->{'index_mapping'}->{$cmeta}) { 791 $cmetamap = $self->{'index_mapping'}->{$cmeta}; 792 $cmeta = ".$cmeta"; 793 } 794 else { 795 print $outhandle "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n"; 796 next; #ignore this one 797 } 798 } 799 else { 800 $cmetamap = $cmeta; # just using the same name 801 } 802 #iterate through the languages 803 foreach $lang (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}})) { 804 if ($first) { 805 $first=0; 806 #set the default default to the first entry 807 $default=$self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}->{$lang}; 808 } 809 if ($lang =~ /default/) { 810 $defaultfound=1; 811 #the default entry goes first 812 $metadata_entry = "<$cmetamap>" . 813 $self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}->{'default'} . "\n" . $metadata_entry; 814 } 815 else { 816 my ($l) = $lang =~ /^\[l=(\w*)\]$/; 817 if ($l) { 818 $metadata_entry .= "<$cmetamap:$l>" . 819 $self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}->{$lang} . "\n"; 820 } 821 } 822 } 823 #if we haven't found a default, put one in 824 if (!$defaultfound) { 825 $metadata_entry = "<$cmetamap>$default\n" . $metadata_entry; 826 } 768 next if ($cmeta =~ /^\./); # for now, ignore ones with dots 769 my ($metadata_entry) = $self->create_language_db_map($cmeta, $cmeta); 827 770 #write the entry to the file 828 771 print $handle $metadata_entry; 829 772 830 } 831 832 #add the indexfieldmap macros to [collection] 833 # eg <TI>Title 834 # <SU>Subject 835 # these may be overidden for other langs if add to macro files 836 $field_entry=""; 837 foreach $longfield (keys %{$self->{'buildproc'}->{'indexfieldmap'}}){ 838 $shortfield = $self->{'buildproc'}->{'indexfieldmap'}->{$longfield}; 839 next if $shortfield eq 1; 840 $field_entry .= "<$shortfield>$longfield\n"; 841 } 842 print $handle $field_entry; 773 } # foreach collmeta key 774 } 775 #add the indexfieldmap macros to [collection] 776 # eg <TI>Title 777 # <SU>Subject 778 # these now come from collection meta. if that is not defined, usses the metadata name 779 $field_entry=""; 780 foreach $longfield (@{$self->{'build_cfg'}->{'indexfields'}}){ 781 print $outhandle "doing long field $longfield\n"; 782 $shortfield = $self->{'buildproc'}->{'indexfieldmap'}->{$longfield}; 783 next if $shortfield eq 1; 843 784 844 print $handle "\n" . ('-' x 70) . "\n"; 845 846 } 785 # we need to check if some coll meta has been defined 786 my $collmeta = ".$longfield"; 787 if ($collmetadefined && defined $self->{'collect_cfg'}->{'collectionmeta'}->{$collmeta}) { 788 print $outhandle "coll meta $collmeta defined\n"; 789 $metadata_entry = $self->create_language_db_map($collmeta, $shortfield); 790 $field_entry .= $metadata_entry; 791 } else { #use the metadata names, or the text macros for allfields and textonly 792 if ($longfield eq "allfields") { 793 $field_entry .= "<$shortfield>_query:textallfields_\n"; 794 } elsif ($longfield eq "text") { 795 $field_entry .= "<$shortfield>_query:texttextonly_\n"; 796 } else { 797 $field_entry .= "<$shortfield>$longfield\n"; 798 } 799 } 800 } 801 print $handle $field_entry; 802 803 #end the collection entry 804 print $handle "\n" . ('-' x 70) . "\n"; 805 806 847 807 848 808 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, … … 867 827 } 868 828 829 sub create_language_db_map { 830 my $self = shift (@_); 831 my ($metaname, $mapname) = @_; 832 my $outhandle = $self->{'outhandle'}; 833 my $defaultfound=0; 834 my $first=1; 835 my $metadata_entry = ""; 836 my $default=""; 837 print $outhandle "crate for meta $metaname\n"; 838 #iterate through the languages 839 foreach $lang (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}})) { 840 print $outhandle "lang=$lang\n"; 841 if ($first) { 842 $first=0; 843 #set the default default to the first entry 844 $default=$self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{$lang}; 845 print $outhandle "defualt = $default\n"; 846 } 847 if ($lang =~ /default/) { 848 $defaultfound=1; 849 #the default entry goes first 850 $metadata_entry = "<$mapname>" . 851 $self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{'default'} . "\n" . $metadata_entry; 852 } 853 else { 854 my ($l) = $lang =~ /^\[l=(\w*)\]$/; 855 if ($l) { 856 $metadata_entry .= "<$mapname:$l>" . 857 $self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{$lang} . "\n"; 858 } 859 } 860 } #foreach lang 861 #if we haven't found a default, put one in 862 if (!$defaultfound) { 863 $metadata_entry = "<$mapname>$default\n" . $metadata_entry; 864 } 865 return $metadata_entry; 866 867 } 869 868 sub collect_specific { 870 869 my $self = shift (@_); 871 870 } 872 871 873 sub make_auxiliary_files { 874 my $self = shift (@_); 875 my ($index); 876 my %build_cfg = (); 877 878 my $outhandle = $self->{'outhandle'}; 879 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 880 881 # get the text directory 882 &util::mk_all_dir ($self->{'build_dir'}); 883 884 # store the build date 885 $build_cfg->{'builddate'} = time; 886 $build_cfg->{'buildtype'} = "mgpp"; 887 888 # store the number of documents and number of bytes 889 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs(); 890 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes(); 891 892 # store the mapping between the index names and the directory names 893 my @indexmap = (); 894 foreach $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) { 895 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}"); 896 } 897 $build_cfg->{'indexmap'} = \@indexmap; 898 899 my @subcollectionmap = (); 900 foreach $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) { 901 push (@subcollectionmap, "$subcollection\-\>" . 902 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection}); 903 } 904 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap); 905 906 my @languagemap = (); 907 foreach $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) { 908 push (@languagemap, "$language\-\>" . 909 $self->{'index_mapping'}->{'languagemap'}->{$language}); 910 } 911 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap); 912 913 $build_cfg->{'notbuilt'} = $self->{'notbuilt'}; 872 # at the end of building, we have an indexfieldmap with all teh mappings, plus 873 # some extras, and indexmap with any indexes in it that weren't specified in the index definition. 874 # we want to make an ordered list of fields that are indexed, and a list of mappings that are used. this will be used for the build.cfg file, and for collection meta definition 875 # we store these in a build.cfg bit 876 sub make_final_field_list { 877 my $self = shift (@_); 878 879 $self->{'build_cfg'} = {}; 914 880 915 881 # store the indexfieldmap information … … 938 904 939 905 } elsif ($field eq 'text') { 940 push (@indexfieldmap, " TextOnly\-\>TX");941 push (@indexfields, " TextOnly");906 push (@indexfieldmap, "text\-\>TX"); 907 push (@indexfields, "text"); 942 908 } elsif ($field eq 'allfields') { 943 push (@indexfieldmap, " AllFields\-\>ZZ");944 push (@indexfields, " AllFields");909 push (@indexfieldmap, "allfields\-\>ZZ"); 910 push (@indexfields, "allfields"); 945 911 } else { 946 912 push (@indexfieldmap, "$field\-\>$self->{'buildproc'}->{'indexfieldmap'}->{$field}"); … … 948 914 949 915 } 950 #if (defined $self->{'buildproc'}->{'indexfields'}->{'TextOnly'}) { 951 #push (@indexfieldmap, "TextOnly\-\>TX"); 952 #} 953 #foreach $field (sort keys %{$self->{'buildproc'}->{'indexfields'}}) { 954 #next if $field eq "TextOnly"; 955 #push (@indexfieldmap, "$field\-\>$self->{'buildproc'}->{'indexfieldmap'}->{$field}"); 956 } 957 958 $build_cfg->{'indexfieldmap'} = \@indexfieldmap; 959 $build_cfg->{'indexfields'} = \@indexfields; 960 961 #store the indexed field information 962 #foreach $field (sort keys %{$self->{'buildproc'}->{'indexfields'}}) { 963 964 #push (@{$build_cfg->{'indexfields'}}, $field); 965 #} 916 } 917 $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap; 918 $self->{'build_cfg'}->{'indexfields'} = \@indexfields; 919 920 921 } 922 923 924 # recreate the field list from the build.cfg file, look first in building, then in index to find it. if there is no build.cfg, we cant do the field list (there is unlikely to be any index anyway.) 925 sub read_final_field_list { 926 my $self = shift (@_); 927 $self->{'build_cfg'} = {}; 928 my @indexfieldmap = (); 929 my @indexfields = (); 930 931 if (scalar(keys %{$self->{'buildproc'}->{'indexfieldmap'}}) == 0) { 932 # set the default mapping 933 $self->{'buildproc'}->set_indexfieldmap ($self->{'indexfieldmap'}); 934 } 935 # we read the stuff in from the build.cfg file - if its there 936 $buildconfigfile = &util::filename_cat($self->{'build_dir'}, "build.cfg"); 937 938 if (!-e $buildconfigfile) { 939 # try the index dir - but do we know where it is?? try here 940 $buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "index", "build.cfg"); 941 if (!-e $buildconfigfile) { 942 #we cant find a config file - just ignore the field list 943 return; 944 } 945 } 946 $buildcfg = &colcfg::read_build_cfg( $buildconfigfile); 947 if (defined $buildcfg->{'indexfields'}) { 948 foreach $field (@{$buildcfg->{'indexfields'}}) { 949 push (@indexfields, "$field"); 950 } 951 } 952 if (defined $buildcfg->{'indexfieldmap'}) { 953 foreach $field (@{$buildcfg->{'indexfieldmap'}}) { 954 push (@indexfieldmap, "$field"); 955 ($f, $v) = $field =~ /^(.*)\-\>(.*)$/; 956 $self->{'buildproc'}->{'indexfieldmap'}->{$f} = $v; 957 } 958 } 959 960 $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap; 961 $self->{'build_cfg'}->{'indexfields'} = \@indexfields; 962 963 } 964 sub make_auxiliary_files { 965 my $self = shift (@_); 966 my ($index); 967 968 my $build_cfg = {}; 969 # this already includes indexfieldmap and indexfields 970 if (defined $self->{'build_cfg'}) { 971 $build_cfg = $self->{'build_cfg'}; 972 } 973 #my %build_cfg = (); 974 975 my $outhandle = $self->{'outhandle'}; 976 print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1); 977 978 # get the text directory 979 &util::mk_all_dir ($self->{'build_dir'}); 980 981 # store the build date 982 $build_cfg->{'builddate'} = time; 983 $build_cfg->{'buildtype'} = "mgpp"; #do we need this?? 984 985 # store the number of documents and number of bytes 986 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs(); 987 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes(); 988 989 # store the mapping between the index names and the directory names 990 my @indexmap = (); 991 foreach $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) { 992 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}"); 993 } 994 $build_cfg->{'indexmap'} = \@indexmap; 995 996 my @subcollectionmap = (); 997 foreach $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) { 998 push (@subcollectionmap, "$subcollection\-\>" . 999 $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection}); 1000 } 1001 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap); 1002 1003 my @languagemap = (); 1004 foreach $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) { 1005 push (@languagemap, "$language\-\>" . 1006 $self->{'index_mapping'}->{'languagemap'}->{$language}); 1007 } 1008 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap); 1009 1010 $build_cfg->{'notbuilt'} = $self->{'notbuilt'}; 1011 966 1012 # write out the build information 967 1013 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg, 968 1014 '^(builddate|buildtype|numdocs|numbytes)$', 969 1015 '^(indexmap|subcollectionmap|languagemap|indexfieldmap|notbuilt|indexfields)$'); 970 1016 971 1017 } 972 1018
Note:
See TracChangeset
for help on using the changeset viewer.