Changeset 12910
- Timestamp:
- 2006-09-28T16:45:30+12:00 (18 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/lucenebuilder.pm
r12845 r12910 87 87 } 88 88 # /** new() **/ 89 90 # lucene has none of these options 91 sub generate_index_options { 92 my $self = shift (@_); 93 94 $self->{'casefold'} = 0; 95 $self->{'stem'} = 0; 96 $self->{'accentfold'} = 0; 97 $self->{'stemindexes'} = 0; 98 } 89 99 90 100 sub default_buildproc { -
trunk/gsdl/perllib/mgbuilder.pm
r12340 r12910 86 86 } 87 87 88 sub generate_index_options { 89 my $self = shift (@_); 90 91 $self->{'casefold'} = 0; 92 $self->{'stem'} = 0; 93 $self->{'accentfold'} = 0; #not yet implemented for mg 94 95 if (!defined($self->{'collect_cfg'}->{'indexoptions'})) { 96 # just use default options 97 $self->{'casefold'} = 1; 98 $self->{'stem'} = 1; 99 } else { 100 foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) { 101 if ($option =~ /stem/) { 102 $self->{'stem'} = 1; 103 } elsif ($option =~ /casefold/) { 104 $self->{'casefold'} = 1; 105 } 106 } 107 } 108 109 # now we record this for the build cfg 110 $self->{'stemindexes'} = 0; 111 if ($self->{'casefold'}) { 112 $self->{'stemindexes'} += 1; 113 } 114 if ($self->{'stem'}) { 115 $self->{'stemindexes'} += 2; 116 } 117 118 119 } 88 120 89 121 sub compress_text { … … 456 488 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n"; 457 489 } 458 system ("mg_stem_idx$exe -b 4096 -s1 -f \"$fullindexprefix\" $osextra"); 459 system ("mg_stem_idx$exe -b 4096 -s2 -f \"$fullindexprefix\" $osextra"); 460 system ("mg_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra"); 461 490 # currently mg wont work if we don't generate all the stem idexes 491 # so we generate them whatever, but don't advertise the fact 492 #if ($self->{'casefold'}) { 493 print STDERR "casefolding\n"; 494 system ("mg_stem_idx$exe -b 4096 -s1 -f \"$fullindexprefix\" $osextra"); 495 #} 496 #if ($self->{'stem'}) { 497 print STDERR "stemming\n"; 498 system ("mg_stem_idx$exe -b 4096 -s2 -f \"$fullindexprefix\" $osextra"); 499 #} 500 #if ($self->{'casefold'} && $self->{'stem'}) { 501 print STDERR "casefold and stem\n"; 502 system ("mg_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra"); 503 #} 504 462 505 # remove unwanted files 463 506 my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir); -
trunk/gsdl/perllib/mgppbuilder.pm
r12340 r12910 59 59 'ib2'=>1, 60 60 'ib3'=>1, 61 'ib4'=>1, 62 'ib5'=>1, 63 'ib6'=>1, 64 'ib7'=>1, 61 65 'i'=>1, 62 66 'il'=>1, … … 151 155 } 152 156 157 sub generate_index_options { 158 my $self = shift (@_); 159 160 $self->{'casefold'} = 0; 161 $self->{'stem'} = 0; 162 $self->{'accentfold'} = 0; 163 164 if (!defined($self->{'collect_cfg'}->{'indexoptions'})) { 165 # just use default options 166 $self->{'casefold'} = 1; 167 $self->{'stem'} = 1; 168 $self->{'accentfold'} = 1; 169 } else { 170 foreach my $option (@{$self->{'collect_cfg'}->{'indexoptions'}}) { 171 if ($option =~ /stem/) { 172 $self->{'stem'} = 1; 173 } elsif ($option =~ /casefold/) { 174 $self->{'casefold'} = 1; 175 } elsif ($option =~ /accentfold/) { 176 $self->{'accentfold'} = 1; 177 } 178 } 179 } 180 181 # now we record this for the build cfg 182 $self->{'stemindexes'} = 0; 183 if ($self->{'casefold'}) { 184 $self->{'stemindexes'} += 1; 185 } 186 if ($self->{'stem'}) { 187 $self->{'stemindexes'} += 2; 188 } 189 if ($self->{'accentfold'}) { 190 $self->{'stemindexes'} += 4; 191 } 192 print STDERR "temindexes = $self->{'stemindexes'}\n"; 193 } 194 153 195 sub default_buildproc { 154 196 my $self = shift (@_); … … 574 616 die "mgppbuilder::build_index - couldn't run $mgpp_stem_idx_exe\n"; 575 617 } 576 system ("mgpp_stem_idx$exe -b 4096 -s1 -f \"$fullindexprefix\" $osextra"); 577 system ("mgpp_stem_idx$exe -b 4096 -s2 -f \"$fullindexprefix\" $osextra"); 578 system ("mgpp_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra"); 579 618 my $accent_folding_enabled = 1; 619 if ($self->{'accentfold'}) { 620 # the first time we do this, we test for accent folding enabled 621 if (system ("mgpp_stem_idx$exe -b 4096 -s4 -f \"$fullindexprefix\" $osextra") != 0) { 622 # accent folding has not been enabled in mgpp 623 $accent_folding_enabled = 0; 624 $self->{'stemindexes'} -= 4; 625 } 626 } 627 if ($self->{'casefold'}) { 628 system ("mgpp_stem_idx$exe -b 4096 -s1 -f \"$fullindexprefix\" $osextra"); 629 if ($accent_folding_enabled && $self->{'accentfold'}) { 630 system ("mgpp_stem_idx$exe -b 4096 -s5 -f \"$fullindexprefix\" $osextra"); 631 } 632 } 633 if ($self->{'stem'}) { 634 system ("mgpp_stem_idx$exe -b 4096 -s2 -f \"$fullindexprefix\" $osextra"); 635 if ($accent_folding_enabled && $self->{'accentfold'}) { 636 system ("mgpp_stem_idx$exe -b 4096 -s6 -f \"$fullindexprefix\" $osextra"); 637 } 638 } 639 if ($self->{'casefold'} && $self->{'stem'}) { 640 system ("mgpp_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra"); 641 if ($accent_folding_enabled && $self->{'accentfold'}) { 642 system ("mgpp_stem_idx$exe -b 4096 -s7 -f \"$fullindexprefix\" $osextra"); 643 } 644 } 645 580 646 # remove unwanted files 581 647 my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir); … … 800 866 801 867 802 sub write_cfg_file {803 my $self = shift(@_);804 my ($build_cfg) = @_;805 806 # write out the build information807 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,808 '^(builddate|buildtype|numdocs|numsections|numbytes|textlevel|indexstem|maxnumeric)$',809 '^(indexmap|subcollectionmap|languagemap|indexfieldmap|notbuilt|indexfields|indexlevels|levelmap)$');810 811 }812 813 868 sub build_cfg_extra { 814 869 my $self = shift (@_);
Note:
See TracChangeset
for help on using the changeset viewer.