Changeset 4811
- Timestamp:
- 2003-06-25T15:57:49+12:00 (21 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/mgppbuilder.pm
r4794 r4811 48 48 $maxdocsize = 12000; 49 49 50 %level_map = ('document'=>'Doc', 51 'section'=>'Sec', 52 'paragraph'=>'Para', 53 'Doc'=>'_textdocument_', 54 'Sec'=>'_textsection_', 55 'Para'=>'_textparagraph_'); 56 57 #$doc_level = "Doc"; 58 #$sec_level = "Sec"; 59 #$para_level = "Para"; 50 60 51 61 %wanted_index_files = ('td'=>1, … … 64 74 # change this so a user can add their own ones in via a file or cfg 65 75 #add AND, OR, NOT NEAR to this list - these cannot be used as field names 76 #also add the level names (Doc, Sec, Para) 66 77 %static_indexfield_map = ('Title'=>'TI', 67 78 'TI'=>1, … … 89 100 'OR'=>1, 90 101 'NOT'=>1, 91 'NEAR'=>1); 102 'NEAR'=>1, 103 'Doc'=>1, 104 'Sec'=>1, 105 'Para'=>1); 92 106 93 107 sub new { … … 170 184 # get the levels (Section, Paragraph) for indexing and compression 171 185 $self->{'levels'} = {}; 186 $self->{'levelorder'} = (); 172 187 if (defined $self->{'collect_cfg'}->{'levels'}) { 173 188 foreach $level ( @{$self->{'collect_cfg'}->{'levels'}} ){ 189 $level =~ tr/A-Z/a-z/; 174 190 $self->{'levels'}->{$level} = 1; 191 push (@{$self->{'levelorder'}}, $level); 175 192 } 176 } 177 193 } else { # default to document 194 $self->{'levels'}->{'document'} = 1; 195 push (@{$self->{'levelorder'}}, 'document'); 196 } 197 198 $self->{'doc_level'} = "document"; 199 if (! $self->{'levels'}->{'document'}) { 200 if ($self->{'levels'}->{'section'}) { 201 $self->{'doc_level'} = "section"; 202 } else { 203 die "you must have either document or section level specified!!\n"; 204 } 205 } 206 print $outhandle "doclevel = ". $self->{'doc_level'}."\n"; 178 207 # get the list of plugins for this collection 179 208 my $plugins = []; … … 274 303 275 304 276 # define the section names for mgpasses305 # define the section names and possibly the doc name for mgpasses 277 306 # the compressor doesn't need to know about paragraphs - never want to 278 307 # retrieve them 279 308 my $mgpp_passes_sections = ""; 280 if ($self->{'levels'}->{'Section'}) { 281 $mgpp_passes_sections .= "-K Section "; 309 my ($doc_level) = $self->{'doc_level'}; 310 $mgpp_passes_sections .= "-J " . %level_map->{$doc_level}; 311 foreach $level (keys %{$self->{'levels'}}) { 312 if ($level ne $doc_level && $level ne "paragraph") { 313 $mgpp_passes_sections .= "-K " . %level_map->{$level}; 314 } 282 315 } 283 316 … … 543 576 544 577 # define the section names for mgpasses 578 # define the section names and possibly the doc name for mgpasses 545 579 my $mgpp_passes_sections = ""; 546 foreach $level (keys (%{$self->{'levels'}})) { 547 if ($level eq "Section" || $level eq "Paragraph") { 548 $mgpp_passes_sections .= "-K $level "; 580 my ($doc_level) = $self->{'doc_level'}; 581 $mgpp_passes_sections .= "-J " . %level_map->{$doc_level} ." "; 582 583 foreach $level (keys %{$self->{'levels'}}) { 584 if ($level ne $doc_level) { 585 $mgpp_passes_sections .= "-K " . %level_map->{$level}. " "; 549 586 } 550 587 } … … 773 810 } # foreach collmeta key 774 811 } 775 #add the index fieldmapmacros to [collection]812 #add the index field macros to [collection] 776 813 # eg <TI>Title 777 814 # <SU>Subject … … 779 816 $field_entry=""; 780 817 foreach $longfield (@{$self->{'build_cfg'}->{'indexfields'}}){ 781 print $outhandle "doing long field $longfield\n";782 818 $shortfield = $self->{'buildproc'}->{'indexfieldmap'}->{$longfield}; 783 819 next if $shortfield eq 1; … … 786 822 my $collmeta = ".$longfield"; 787 823 if ($collmetadefined && defined $self->{'collect_cfg'}->{'collectionmeta'}->{$collmeta}) { 788 print $outhandle "coll meta $collmeta defined\n";789 824 $metadata_entry = $self->create_language_db_map($collmeta, $shortfield); 790 825 $field_entry .= $metadata_entry; … … 801 836 print $handle $field_entry; 802 837 838 # now add the level names 839 $level_entry = ""; 840 foreach $level (@{$self->{'collect_cfg'}->{'levels'}}) { 841 my $collmeta = ".$level"; # based on the original specification 842 $level =~ tr/A-Z/a-z/; # make it lower case 843 my $levelid = %level_map->{$level}; # find the actual value we used in the index 844 if ($collmetadefined && defined $self->{'collect_cfg'}->{'collectionmeta'}->{$collmeta}) { 845 $metadata_entry = $self->create_language_db_map($collmeta, $levelid); 846 $level_entry .= $metadata_entry; 847 } else { 848 # use the default macro 849 $level_entry .= "<$levelid>" . %level_map->{$levelid} . "\n"; 850 } 851 } 852 print $handle $level_entry; 803 853 #end the collection entry 804 854 print $handle "\n" . ('-' x 70) . "\n"; … … 835 885 my $metadata_entry = ""; 836 886 my $default=""; 837 print $outhandle "crate for meta $metaname\n";838 887 #iterate through the languages 839 888 foreach $lang (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}})) { 840 print $outhandle "lang=$lang\n";841 889 if ($first) { 842 890 $first=0; 843 891 #set the default default to the first entry 844 892 $default=$self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{$lang}; 845 print $outhandle "defualt = $default\n";846 893 } 847 894 if ($lang =~ /default/) { … … 983 1030 $build_cfg->{'buildtype'} = "mgpp"; #do we need this?? 984 1031 1032 # store the level info 1033 my @indexlevels = (); 1034 foreach $l (@{$self->{'levelorder'}}) { 1035 push (@indexlevels, %level_map->{$l}); 1036 } 1037 $build_cfg->{'indexlevels'} = \@indexlevels; 1038 1039 if ($self->{'levels'}->{'section'}) { 1040 $build_cfg->{'textlevel'} = %level_map->{'section'}; 1041 } else { 1042 $build_cfg->{'textlevel'} = %level_map->{'document'}; 1043 } 985 1044 # store the number of documents and number of bytes 986 1045 $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs(); … … 1012 1071 # write out the build information 1013 1072 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg, 1014 '^(builddate|buildtype|numdocs|numbytes )$',1015 '^(indexmap|subcollectionmap|languagemap|indexfieldmap|notbuilt|indexfields )$');1073 '^(builddate|buildtype|numdocs|numbytes|textlevel)$', 1074 '^(indexmap|subcollectionmap|languagemap|indexfieldmap|notbuilt|indexfields|indexlevels)$'); 1016 1075 1017 1076 } -
trunk/gsdl/perllib/mgppbuildproc.pm
r4769 r4811 42 42 } 43 43 44 #this must be the same as in mgppbuilder 45 %level_map = ('document'=>'Doc', 46 'section'=>'Sec', 47 'paragraph'=>'Para'); 44 48 45 49 sub new { … … 298 302 #level not Section 299 303 my $docs_only = 1; 300 if ($self->{'levels'}->{' Section'}) {304 if ($self->{'levels'}->{'section'}) { 301 305 $docs_only = 0; 302 306 } … … 543 547 my ($documenttag) = ""; 544 548 my($documentendtag) = ""; 545 #if ($self->{'levels'}->{'Document'}) {546 $documenttag = "\n< Document>\n";547 $documentendtag = " </Document>\n";548 #}549 if ($self->{'levels'}->{'document'}) { 550 $documenttag = "\n<". %level_map->{'document'} . ">\n"; 551 $documentendtag = "\n</". %level_map->{'document'} . ">\n"; 552 } 549 553 my ($sectiontag) = ""; 550 if ($self->{'levels'}->{' Section'}) {551 $sectiontag = "\n< Section>\n";554 if ($self->{'levels'}->{'section'}) { 555 $sectiontag = "\n<". %level_map->{'section'} . ">\n"; 552 556 } 553 557 my ($paratag) = ""; 554 if ($self->{'levels'}->{' Paragraph'}) {558 if ($self->{'levels'}->{'paragraph'}) { 555 559 if ($self->{'strip_html'}) { 556 $paratag = "< Paragraph>";560 $paratag = "<". %level_map->{'paragraph'} . ">"; 557 561 } else { 558 562 print $outhandle "Paragraph level can not be used with no_strip_html!. Not indexing Paragraphs.\n"; 559 563 } 560 564 } 565 561 566 my $doc_section = 0; # just for this document 562 567
Note:
See TracChangeset
for help on using the changeset viewer.