- Timestamp:
- 2013-05-16T15:30:15+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/lucenebuildproc.pm
r27329 r27358 49 49 50 50 $self->{'numincdocs'} = 0; 51 51 $self->{'specified_fields'} = (); # list of fields actually specified in the index, in a map 52 $self->{'allfields_index'} = 0; # do we need allfields index? 53 $self->{'all_metadata_specified'} = 0; # are we indexing all metadata? 54 $self->{'actualsortfields'} = {}; # sort fields that have actually been used 55 $self->{'sortfieldnamemap'} = {}; # mapping between field name and field shortname, eg dc.Title->byTI 52 56 return bless $self, $class; 53 57 } 54 58 59 sub set_index { 60 my $self = shift (@_); 61 my ($index, $indexexparr) = @_; 62 63 $self->mgppbuildproc::set_index($index, $indexexparr); 64 65 # just get the list of index fields without any subcoll stuff 66 my ($fields) = split (/:/, $self->{'index'}); 67 68 foreach my $field (split (/;/, $fields)) { 69 if ($field eq "allfields") { 70 $self->{'allfields_index'} = 1; 71 } elsif ($field eq "metadata") { 72 $self->{'all_metadata_specified'} = 1; 73 } else { 74 $field =~ s/^top//; 75 $self->{'specified_fields'} ->{$field} = 1; 76 } 77 } 78 } 79 80 sub set_sortfields { 81 my $self = shift (@_); 82 83 ($self->{'sortfields'}) = @_; 84 } 55 85 56 86 sub is_incremental_capable … … 76 106 return if (!$self->get_indexing_text() && ($edit_mode eq "delete")); 77 107 108 # 0/1 to indicate whether this doc is part of the specified subcollection 78 109 my $indexed_doc = $self->is_subcollection_doc($doc_obj); 79 110 … … 85 116 $self->{'num_docs'} -= 1; 86 117 } 118 87 119 88 120 # get the parameters for the output … … 152 184 153 185 154 # has the user added a 'metadata' index? 155 my $all_metadata_specified = 0; 156 # which fields have already been indexed? (same as fields, but in a map) 157 my $specified_fields = {}; 158 159 # do we have an allfields index?? 160 my $allfields_index = 0; 161 # collect up all the text for it in here 186 # collect up all the text for allfields index in here (if there is one) 162 187 my $allfields_text = ""; 163 foreach my $field (split (/;/, $fields)) { 164 if ($field eq "allfields") { 165 $allfields_index = 1; 166 } elsif ($field eq "metadata") { 167 $all_metadata_specified = 1; 168 } 169 } 170 188 171 189 foreach my $field (split (/;/, $fields)) { 172 190 … … 180 198 181 199 #individual metadata and or text specified - could be a comma separated list 182 $specified_fields->{$real_field} = 1;200 #$specified_fields->{$real_field} = 1; 183 201 my $shortname=""; 184 202 my $new_field = 0; # have we found a new field name? … … 235 253 } 236 254 237 if ($ allfields_index) {255 if ($self->{'allfields_index'}) { 238 256 $allfields_text .= $new_text; 239 257 } … … 259 277 } # foreach field 260 278 261 if ($ all_metadata_specified) {279 if ($self->{'all_metadata_specified'}) { 262 280 263 281 my $new_text = ""; … … 269 287 next unless defined $mvalue && $mvalue ne ""; 270 288 # we have already indexed this 271 next if defined ($s pecified_fields->{$mfield});289 next if defined ($self->{'specified_fields'}->{$mfield}); 272 290 # check fields here, maybe others dont want - change to use dontindex!! 273 291 next if ($mfield eq "Identifier" || $mfield eq "classifytype" || $mfield eq "assocfilepath"); … … 286 304 $self->{'allindexfields'}->{$mfield} = 1; 287 305 $new_text .= "<$shortname index=\"1\">$mvalue</$shortname>\n"; 288 if ($ allfields_index) {306 if ($self->{'allfields_index'}) { 289 307 $allfields_text .= "$mvalue "; 290 308 } … … 308 326 } 309 327 310 if ($ allfields_index) {328 if ($self->{'allfields_index'}) { 311 329 312 330 my $new_text = "<ZZ index=\"1\">$allfields_text</ZZ>\n"; … … 323 341 } 324 342 } 325 343 # only add sort fields for this section if we are indexing this section, we are doing section level indexing or this is the top section 344 if ($self->{'indexing_text'} && ($sec_tag_name ne "" || $doc_section == 1 )) { 345 # add sort fields if there are any 346 347 foreach my $sfield (@{$self->{'sortfields'}}) { 348 my $sf_shortname; 349 if (defined $self->{'sortfieldnamemap'}->{$sfield}) { 350 $sf_shortname = $self->{'sortfieldnamemap'}->{$sfield}; 351 } 352 else { 353 $sf_shortname = $self->create_sortfield_shortname($sfield); 354 $self->{'sortfieldnamemap'}->{$sfield} = $sf_shortname; 355 $self->{'sortfieldnamemap'}->{$sf_shortname} = 1; 356 } 357 my @metadata_list = (); # put any metadata values in here 358 foreach my $submeta (split /,/, $sfield) { 359 $submeta =~ s/^ex\.([^.]+)$/$1/; #strip off ex. iff it's the only metadata set prefix (will leave ex.dc.* intact) 360 361 my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)}; 362 push (@metadata_list, @section_metadata); 363 } 364 my $new_text = ""; 365 foreach my $item (@metadata_list) { 366 &ghtml::htmlsafe($item); 367 $new_text .= "$item "; 368 } 369 if ($new_text =~ /\S/) { 370 $new_text = "<$sf_shortname index=\"1\" tokenize=\"0\">$new_text</$sf_shortname>"; 371 # filter the text??? 372 $text .= "$new_text"; # add it to the main text block 373 $self->{'actualsortfields'}->{$sfield} = 1; 374 } 375 } 376 } 326 377 $text .= "\n</$sec_tag_name>\n" if ($sec_tag_name ne ""); 327 378 328 379 $section = $doc_obj->get_next_section($section); 329 } # while defined section 380 } # for each section 381 382 #open (TEXTOUT, ">text.out"); 383 #print TEXTOUT "$text\n$documentendtag"; 384 #close TEXTOUT; 330 385 331 386 print $lucenehandle "$text\n$documentendtag"; … … 556 611 } 557 612 } 613 614 sub create_sortfield_shortname { 615 my $self = shift(@_); 616 617 my ($realname) = @_; 618 619 my $index_shortname; 620 # if we have created a shortname for an index on this field, then use it. 621 if (defined $self->{'fieldnamemap'}->{$realname}) { 622 $index_shortname = $self->{'fieldnamemap'}->{$realname}; 623 } else { 624 $index_shortname = $self->create_shortname($realname); 625 } 626 return "by".$index_shortname; 627 } 628 629 558 630 1; 559 631
Note:
See TracChangeset
for help on using the changeset viewer.