Ignore:
Timestamp:
2007-01-12T14:18:53+13:00 (17 years ago)
Author:
kjdon
Message:

mgpp and lucene. made them always use doc and sec levels for the text regardless of index level specification. mgpp will always index at doc and sec level, but these options may not be presented to the user. this is to ensure that if we have sectioned documents, we don't need to turn on section indexing in order for the document display to use sections

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/mgppbuilder.pm

    r13341 r13590  
    4646          'Sec'=>'_textsection_',
    4747          'Para'=>'_textparagraph_');
    48 
    49 #$doc_level = "Doc";
    50 #$sec_level = "Sec";
    51 #$para_level = "Para";
    5248
    5349our %wanted_index_files = ('td'=>1,
     
    127123    }
    128124   
    129     $self->{'doc_level'} = "document";
    130     if (! $self->{'levels'}->{'document'}) {
    131     if ($self->{'levels'}->{'section'}) {
    132         $self->{'doc_level'} = "section";
    133     } else {
    134         die "you must have either document or section level specified!!\n";
    135     }
    136     }
    137 
    138125    $self->{'buildtype'} = "mgpp";
    139126
     
    229216    # the compressor doesn't need to know about paragraphs - never want to
    230217    # retrieve them
    231     my $mgpp_passes_sections = "";
    232     my ($doc_level) = $self->{'doc_level'};
    233     $mgpp_passes_sections .= "-J " . $level_map{$doc_level} . " ";
    234     foreach my $level (keys %{$self->{'levels'}}) {
    235     if ($level ne $doc_level && $level ne "paragraph") {
    236         $mgpp_passes_sections .= "-K " . $level_map{$level} . " ";
    237     }
    238     }
     218   
     219    # always use Doc and Sec levels
     220    my $mgpp_passes_sections = "-J ". $level_map{"document"} ." -K " . $level_map{"section"} ." ";
    239221
    240222    print $outhandle "\n*** creating the compressed text\n" if ($self->{'verbosity'} >= 1);
     
    250232    $handle = STDOUT;
    251233    } else {
    252     #print $outhandle "trying to run (compress 1) mgpp_passes$exe $mgpp_passes_sections -f \"$fulltextprefix\" -T1 $osextra\n";
    253234    if (!-e "$mgpp_passes_exe" ||
    254235        !open (PIPEOUT, "| mgpp_passes$exe  -M $maxnumeric $mgpp_passes_sections -f \"$fulltextprefix\" -T1 $osextra")) {
     
    258239    $handle = mgppbuilder::PIPEOUT;
    259240    }
    260 
    261     # gdbm_level
    262     my $gdbm_level = "document";
    263     if ($self->{'levels'}->{'section'}) {
    264     $gdbm_level = "section";
    265     }
    266    
     241   
     242    my $gdbm_level = "section";
     243
    267244    $self->{'buildproc'}->set_output_handle ($handle);
    268245    $self->{'buildproc'}->set_mode ('text');
     
    300277
    301278    if (!$self->{'debug'}) {
    302         #print $outhandle "trying to run (compress 2) mgpp_passes$exe $mgpp_passes_sections -f \"$fulltextprefix\" -T2 $osextra\n";
    303279        if (!-e "$mgpp_passes_exe" ||
    304280        !open ($handle, "| mgpp_passes$exe  -M $maxnumeric $mgpp_passes_sections -f \"$fulltextprefix\" -T2 $osextra")) {
     
    351327    foreach my $index (@$indexes) {
    352328    my ($fields, $subcollection, $languages) = split (":", $index);
    353     # the directory name starts with a processed version of index fields
    354     #my ($pindex) = $self->process_field($fields);
    355     #$pindex = lc ($pindex);
    356     # now we only ever have one index, and its called 'idx'
     329   
     330    # we only ever have one index, and its called 'idx'
    357331    my $pindex = 'idx';
    358332   
     
    441415
    442416    # define the section names for mgpasses
    443     # define the section names and possibly the doc name for mgpasses
    444     my $mgpp_passes_sections = "";
    445     my ($doc_level) = $self->{'doc_level'};
    446     $mgpp_passes_sections .= "-J " . $level_map{$doc_level} ." ";
    447    
    448     foreach my $level (keys %{$self->{'levels'}}) {
    449     if ($level ne $doc_level) {
    450         $mgpp_passes_sections .= "-K " . $level_map{$level}. " ";
    451     }
     417    my $mgpp_passes_sections = "-J ". $level_map{"document"} ." -K " . $level_map{"section"} ." ";
     418    if ($self->{'levels'}->{'paragraph'}) {
     419    $mgpp_passes_sections .= "-K " . $level_map{'paragraph'}. " ";
    452420    }
    453421
     
    526494    }
    527495       
    528     # gdbm_level
    529     my $gdbm_level = "document";
    530     if ($self->{'levels'}->{'section'}) {
    531     $gdbm_level = "section";
    532     }
     496    # gdbm_level is always section
     497    my $gdbm_level = "section";
    533498
    534499    # set up the document processr
     
    878843    $build_cfg->{'levelmap'} = \@levelmap;
    879844
    880     if ($self->{'levels'}->{'section'}) {
    881     $build_cfg->{'textlevel'} = $level_map{'section'};
    882     } else {   
    883     $build_cfg->{'textlevel'} = $level_map{'document'};
    884     }
     845    # text level (and gdbm level) is always section
     846    $build_cfg->{'textlevel'} = $level_map{'section'};
    885847   
    886848}
Note: See TracChangeset for help on using the changeset viewer.