Changeset 9669


Ignore:
Timestamp:
2005-04-14T10:44:59+12:00 (19 years ago)
Author:
kjdon
Message:

fixed up the case where you have subcollection partitions and language partitions - was doing an OR on both, but really needed an OR for partitions ANDed with an OR for langs

Location:
trunk/gsdl/perllib
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/lucenebuilder.pm

    r9548 r9669  
    231231    # to a subcollection
    232232    my $indexexparr = [];
     233    my $langarr = [];
    233234
    234235    # there may be subcollection info, and language info.
     
    247248    # ones we want in the index
    248249   
    249     # this puts a separate Language/en entry in for each language in the list
    250     # is this what we want?
    251     # should we just have one entry with Language/en,es/ ??
    252250    my @languages = ();
    253251    my $language_metadata = "Language";
     
    262260    }
    263261    if($not) {
    264         push (@$indexexparr, "!$language_metadata/$language/");
     262        push (@$langarr, "!$language");
    265263    } else {
    266         push (@$indexexparr, "$language_metadata/$language/");
     264        push (@$langarr, "$language");
    267265    }
    268266    }
     
    307305    $self->{'buildproc'}->set_mode ('text');
    308306    $self->{'buildproc'}->set_index ($index, $indexexparr);
     307    $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language);
    309308    $self->{'buildproc'}->set_indexing_text (1);
    310309    $self->{'buildproc'}->set_store_text(1);
  • trunk/gsdl/perllib/lucenebuildproc.pm

    r9214 r9669  
    128128        }
    129129    }
     130    }
     131
     132    # if this doc is so far in the sub collection, and we have lang info,
     133    # now we check the languages to see if it matches
     134    if($indexed_doc && defined $self->{'lang_meta'}) {
     135    $indexed_doc = 0;
     136    my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'});
     137    if (defined $field) {
     138        foreach my $lang (@{$self->{'langarr'}}) {
     139        my ($bool) = $lang =~ /^(.)/;
     140        if ($bool eq '!') {
     141            $lang =~ s/^.//;
     142            if ($field !~ /$lang/) {
     143            $indexed_doc = 1; last;
     144            }
     145        } else {
     146            if ($field =~ /$lang/) {
     147            $indexed_doc = 1; last;
     148            }
     149        }
     150        }
     151    }
    130152    }
    131153
  • trunk/gsdl/perllib/mgbuilder.pm

    r9548 r9669  
    545545    # to a subcollection
    546546    my $indexexparr = [];
    547 
     547    my $langarr = [];
    548548    # there may be subcollection info, and language info.
    549549    my ($level, $fields, $subcollection, $language) = split (":", $index);
     
    560560    # a language subcollection - only put languages expressions for the
    561561    # ones we want in the index
    562     # this puts a separate Language/en entry in for each language in the list
    563     # is this what we want?
    564     # should we just have one entry with Language/en,es/ ??
    565562
    566563    my @languages = ();
     
    576573    }
    577574    if($not) {
    578         push (@$indexexparr, "!$language_metadata/$language/");
     575        push (@$langarr, "!$language");
    579576    } else {
    580         push (@$indexexparr, "$language_metadata/$language/");
    581     }
    582     }
    583 
     577        push (@$langarr, "$language");
     578    }
     579    }
     580   
    584581    # Build index dictionary. Uses verbatim stem method
    585582    print $outhandle "\n    creating index dictionary\n"  if ($self->{'verbosity'} >= 1);
     
    602599    $self->{'buildproc'}->set_mode ('text');
    603600    $self->{'buildproc'}->set_index ($index, $indexexparr);
     601    $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language);
    604602    $self->{'buildproc'}->set_indexing_text (1);
    605603    $self->{'buildproc'}->set_store_text(1);
  • trunk/gsdl/perllib/mgbuildproc.pm

    r8716 r9669  
    151151}
    152152
     153sub set_index_languages {
     154    my $self = shift (@_);
     155    my ($lang_meta, $langarr) = @_;
     156    $self->{'lang_meta'} = $lang_meta;
     157    $self->{'langarr'} = $langarr;
     158}
     159
    153160sub get_index {
    154161    my $self = shift (@_);
     
    467474        }
    468475    }
     476    }
     477    # if this doc is so far in the sub collection, and we have lang info,
     478    # now we check the languages to see if it matches
     479    if($indexed_doc && defined $self->{'lang_meta'}) {
     480    $indexed_doc = 0;
     481    my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'});
     482    if (defined $field) {
     483        foreach my $lang (@{$self->{'langarr'}}) {
     484        my ($bool) = $lang =~ /^(.)/;
     485        if ($bool eq '!') {
     486            $lang =~ s/^.//;
     487            if ($field !~ /$lang/) {
     488            $indexed_doc = 1; last;
     489            }
     490        } else {
     491            if ($field =~ /$lang/) {
     492            $indexed_doc = 1; last;
     493            }
     494        }
     495        }
     496    }
    469497    }
    470498
  • trunk/gsdl/perllib/mgppbuilder.pm

    r9548 r9669  
    645645    # to a subcollection
    646646    my $indexexparr = [];
    647 
     647    my $langarr = [];
    648648    # there may be subcollection info, and language info.
    649649    my ($fields, $subcollection, $language) = split (":", $index);
     
    661661    # ones we want in the index
    662662   
    663     # this puts a separate Language/en entry in for each language in the list
    664     # is this what we want?
    665     # should we just have one entry with Language/en,es/ ??
    666663    my @languages = ();
    667664    my $language_metadata = "Language";
     
    676673    }
    677674    if($not) {
    678         push (@$indexexparr, "!$language_metadata/$language/");
     675        push (@$langarr, "!$language");
    679676    } else {
    680         push (@$indexexparr, "$language_metadata/$language/");
     677        push (@$langarr, "$language");
    681678    }
    682679    }
     
    701698    $self->{'buildproc'}->set_mode ('text');
    702699    $self->{'buildproc'}->set_index ($index, $indexexparr);
     700    $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language);
    703701    $self->{'buildproc'}->set_indexing_text (1);
    704702    $self->{'buildproc'}->set_store_text(1);
  • trunk/gsdl/perllib/mgppbuildproc.pm

    r9157 r9669  
    163163}
    164164
     165sub set_index_languages {
     166    my $self = shift (@_);
     167    my ($lang_meta, $langarr) = @_;
     168    $self->{'lang_meta'} = $lang_meta;
     169    $self->{'langarr'} = $langarr;
     170}
     171
    165172sub get_index {
    166173    my $self = shift (@_);
     
    487494        my $tag = $1;
    488495        $outtext .= $`." "; #add everything before the matched tag
    489         $text = $'; #everything after the matched tag
     496        $text = $'; #'everything after the matched tag
    490497        if ($para && $tag =~ /^\s*p\s/i) {
    491498        $outtext .= $para;
     
    494501        $text =~ /<\/pre>/; # find the closing pre tag
    495502        my $tmp_text = $`; #everything before the closing pre tag
    496         $text = $'; #everything after the </pre>
     503        $text = $'; #'everything after the </pre>
    497504        $tmp_text =~ s/[<>]//g; # remove all < and >
    498505        $outtext.= $tmp_text . " ";
     
    560567    }
    561568
     569    # if this doc is so far in the sub collection, and we have lang info,
     570    # now we check the languages to see if it matches
     571    if($indexed_doc && defined $self->{'lang_meta'}) {
     572    $indexed_doc = 0;
     573    my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'});
     574    if (defined $field) {
     575        foreach my $lang (@{$self->{'langarr'}}) {
     576        my ($bool) = $lang =~ /^(.)/;
     577        if ($bool eq '!') {
     578            $lang =~ s/^.//;
     579            if ($field !~ /$lang/) {
     580            $indexed_doc = 1; last;
     581            }
     582        } else {
     583            if ($field =~ /$lang/) {
     584            $indexed_doc = 1; last;
     585            }
     586        }
     587        }
     588    }
     589    }
     590
    562591    # this is another document
    563592    $self->{'num_docs'} += 1;
     
    578607    }
    579608    my ($paratag) = "";
     609   
    580610    if ($self->{'levels'}->{'paragraph'}) {
    581611    if ($self->{'strip_html'}) {
Note: See TracChangeset for help on using the changeset viewer.