Changeset 9669 for trunk/gsdl
- Timestamp:
- 2005-04-14T10:44:59+12:00 (19 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/lucenebuilder.pm
r9548 r9669 231 231 # to a subcollection 232 232 my $indexexparr = []; 233 my $langarr = []; 233 234 234 235 # there may be subcollection info, and language info. … … 247 248 # ones we want in the index 248 249 249 # this puts a separate Language/en entry in for each language in the list250 # is this what we want?251 # should we just have one entry with Language/en,es/ ??252 250 my @languages = (); 253 251 my $language_metadata = "Language"; … … 262 260 } 263 261 if($not) { 264 push (@$ indexexparr, "!$language_metadata/$language/");262 push (@$langarr, "!$language"); 265 263 } else { 266 push (@$ indexexparr, "$language_metadata/$language/");264 push (@$langarr, "$language"); 267 265 } 268 266 } … … 307 305 $self->{'buildproc'}->set_mode ('text'); 308 306 $self->{'buildproc'}->set_index ($index, $indexexparr); 307 $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language); 309 308 $self->{'buildproc'}->set_indexing_text (1); 310 309 $self->{'buildproc'}->set_store_text(1); -
trunk/gsdl/perllib/lucenebuildproc.pm
r9214 r9669 128 128 } 129 129 } 130 } 131 132 # if this doc is so far in the sub collection, and we have lang info, 133 # now we check the languages to see if it matches 134 if($indexed_doc && defined $self->{'lang_meta'}) { 135 $indexed_doc = 0; 136 my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'}); 137 if (defined $field) { 138 foreach my $lang (@{$self->{'langarr'}}) { 139 my ($bool) = $lang =~ /^(.)/; 140 if ($bool eq '!') { 141 $lang =~ s/^.//; 142 if ($field !~ /$lang/) { 143 $indexed_doc = 1; last; 144 } 145 } else { 146 if ($field =~ /$lang/) { 147 $indexed_doc = 1; last; 148 } 149 } 150 } 151 } 130 152 } 131 153 -
trunk/gsdl/perllib/mgbuilder.pm
r9548 r9669 545 545 # to a subcollection 546 546 my $indexexparr = []; 547 547 my $langarr = []; 548 548 # there may be subcollection info, and language info. 549 549 my ($level, $fields, $subcollection, $language) = split (":", $index); … … 560 560 # a language subcollection - only put languages expressions for the 561 561 # ones we want in the index 562 # this puts a separate Language/en entry in for each language in the list563 # is this what we want?564 # should we just have one entry with Language/en,es/ ??565 562 566 563 my @languages = (); … … 576 573 } 577 574 if($not) { 578 push (@$ indexexparr, "!$language_metadata/$language/");575 push (@$langarr, "!$language"); 579 576 } else { 580 push (@$ indexexparr, "$language_metadata/$language/");581 } 582 } 583 577 push (@$langarr, "$language"); 578 } 579 } 580 584 581 # Build index dictionary. Uses verbatim stem method 585 582 print $outhandle "\n creating index dictionary\n" if ($self->{'verbosity'} >= 1); … … 602 599 $self->{'buildproc'}->set_mode ('text'); 603 600 $self->{'buildproc'}->set_index ($index, $indexexparr); 601 $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language); 604 602 $self->{'buildproc'}->set_indexing_text (1); 605 603 $self->{'buildproc'}->set_store_text(1); -
trunk/gsdl/perllib/mgbuildproc.pm
r8716 r9669 151 151 } 152 152 153 sub set_index_languages { 154 my $self = shift (@_); 155 my ($lang_meta, $langarr) = @_; 156 $self->{'lang_meta'} = $lang_meta; 157 $self->{'langarr'} = $langarr; 158 } 159 153 160 sub get_index { 154 161 my $self = shift (@_); … … 467 474 } 468 475 } 476 } 477 # if this doc is so far in the sub collection, and we have lang info, 478 # now we check the languages to see if it matches 479 if($indexed_doc && defined $self->{'lang_meta'}) { 480 $indexed_doc = 0; 481 my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'}); 482 if (defined $field) { 483 foreach my $lang (@{$self->{'langarr'}}) { 484 my ($bool) = $lang =~ /^(.)/; 485 if ($bool eq '!') { 486 $lang =~ s/^.//; 487 if ($field !~ /$lang/) { 488 $indexed_doc = 1; last; 489 } 490 } else { 491 if ($field =~ /$lang/) { 492 $indexed_doc = 1; last; 493 } 494 } 495 } 496 } 469 497 } 470 498 -
trunk/gsdl/perllib/mgppbuilder.pm
r9548 r9669 645 645 # to a subcollection 646 646 my $indexexparr = []; 647 647 my $langarr = []; 648 648 # there may be subcollection info, and language info. 649 649 my ($fields, $subcollection, $language) = split (":", $index); … … 661 661 # ones we want in the index 662 662 663 # this puts a separate Language/en entry in for each language in the list664 # is this what we want?665 # should we just have one entry with Language/en,es/ ??666 663 my @languages = (); 667 664 my $language_metadata = "Language"; … … 676 673 } 677 674 if($not) { 678 push (@$ indexexparr, "!$language_metadata/$language/");675 push (@$langarr, "!$language"); 679 676 } else { 680 push (@$ indexexparr, "$language_metadata/$language/");677 push (@$langarr, "$language"); 681 678 } 682 679 } … … 701 698 $self->{'buildproc'}->set_mode ('text'); 702 699 $self->{'buildproc'}->set_index ($index, $indexexparr); 700 $self->{'buildproc'}->set_index_languages ($language_metadata, $langarr) if (defined $language); 703 701 $self->{'buildproc'}->set_indexing_text (1); 704 702 $self->{'buildproc'}->set_store_text(1); -
trunk/gsdl/perllib/mgppbuildproc.pm
r9157 r9669 163 163 } 164 164 165 sub set_index_languages { 166 my $self = shift (@_); 167 my ($lang_meta, $langarr) = @_; 168 $self->{'lang_meta'} = $lang_meta; 169 $self->{'langarr'} = $langarr; 170 } 171 165 172 sub get_index { 166 173 my $self = shift (@_); … … 487 494 my $tag = $1; 488 495 $outtext .= $`." "; #add everything before the matched tag 489 $text = $'; # everything after the matched tag496 $text = $'; #'everything after the matched tag 490 497 if ($para && $tag =~ /^\s*p\s/i) { 491 498 $outtext .= $para; … … 494 501 $text =~ /<\/pre>/; # find the closing pre tag 495 502 my $tmp_text = $`; #everything before the closing pre tag 496 $text = $'; # everything after the </pre>503 $text = $'; #'everything after the </pre> 497 504 $tmp_text =~ s/[<>]//g; # remove all < and > 498 505 $outtext.= $tmp_text . " "; … … 560 567 } 561 568 569 # if this doc is so far in the sub collection, and we have lang info, 570 # now we check the languages to see if it matches 571 if($indexed_doc && defined $self->{'lang_meta'}) { 572 $indexed_doc = 0; 573 my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'}); 574 if (defined $field) { 575 foreach my $lang (@{$self->{'langarr'}}) { 576 my ($bool) = $lang =~ /^(.)/; 577 if ($bool eq '!') { 578 $lang =~ s/^.//; 579 if ($field !~ /$lang/) { 580 $indexed_doc = 1; last; 581 } 582 } else { 583 if ($field =~ /$lang/) { 584 $indexed_doc = 1; last; 585 } 586 } 587 } 588 } 589 } 590 562 591 # this is another document 563 592 $self->{'num_docs'} += 1; … … 578 607 } 579 608 my ($paratag) = ""; 609 580 610 if ($self->{'levels'}->{'paragraph'}) { 581 611 if ($self->{'strip_html'}) {
Note:
See TracChangeset
for help on using the changeset viewer.