Changeset 27328 for main/trunk/greenstone2/perllib/mgppbuildproc.pm
- Timestamp:
- 2013-05-14T11:09:58+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/mgppbuildproc.pm
r24404 r27328 90 90 91 91 $self->{'dontindex'} = {}; 92 $self->{'indexfieldmap'} = {}; 93 $self->{'indexfields'} = {}; # only put in the ones that are not specified directly in the index 92 $self->{'allindexfields'} = {}; # list of all actually indexed fields 93 $self->{'extraindexfields'} = {}; # indexed fields not specfied in original index list - ie if 'metadata' was specified. 94 $self->{'fieldnamemap'} = {'allfields'=>'ZZ', 95 'ZZ'=>1, 96 'text'=>'TX', 97 'TX'=>1}; # mapping between index full names and short names. Once we have decided on a mapping it goes in here, whether we have indexed something or not. 94 98 $self->{'strip_html'}=1; 95 99 96 100 return bless $self, $class; 97 }98 99 100 #sub set_indexfieldmap {101 # my $self = shift (@_);102 # my ($indexmap) = @_;103 104 # $self->{'default_index_field_mapping'} = $indexmap;105 #$self->{'indexfieldmap'} = $indexmap;106 #}107 108 sub get_indexfieldmap {109 my $self = shift (@_);110 111 return $self->{'indexfieldmap'};112 101 } 113 102 … … 279 268 my $new_text = ""; 280 269 281 # we get allfields by default - do nothing except add into the map 282 if ($real_field eq "allfields") { 283 $self->{'indexfieldmap'}->{"allfields"} = "ZZ"; 284 $self->{'indexfieldmap'}->{"ZZ"} = 1; 285 } 270 # we get allfields by default 271 next if ($real_field eq "allfields"); 286 272 287 273 # metadata - output all metadata we know about except gsdl stuff … … 290 276 # we will process this later, so we are not reindexing metadata already indexed 291 277 $all_metadata_specified = 1; 278 next; 292 279 } 293 280 294 else {295 296 281 #individual metadata and or text specified - could be 297 282 # a comma separated list 298 283 $specified_fields->{$real_field} = 1; 299 284 my $shortname=""; 300 my $new_field = 0; # have we found a new field name? 301 302 if (defined $self->{'indexfieldmap'}->{$real_field}) {303 $shortname = $self->{'indexfieldmap'}->{$real_field};304 }305 else {306 $shortname = $self->create_shortname($real_field);307 $new_field = 1; # we want to record this shortname, but only if we have actually found some metadata values308 } 285 286 if (defined $self->{'fieldnamemap'}->{$real_field}) { 287 $shortname = $self->{'fieldnamemap'}->{$real_field}; 288 } else { 289 $shortname = $self->create_shortname($real_field); 290 $self->{'fieldnamemap'}->{$real_field} = $shortname; 291 $self->{'fieldnamemap'}->{$shortname} = 1; 292 } 293 309 294 my @metadata_list = (); # put any meta values in here 310 295 my $section_text = ""; # put any text in here … … 357 342 # only add tags in if indexing 358 343 $new_text .= "</$shortname>"; 359 } 360 if ($self->{'indexing_text'} && $new_field) { 361 # we need to add to the list in indexfields 362 363 $self->{'indexfieldmap'}->{$real_field} = $shortname; 364 $self->{'indexfieldmap'}->{$shortname} = 1; 344 $self->{'allindexfields'}->{$real_field} = 1; 365 345 } 366 346 } 367 }368 347 369 348 # filter the text … … 388 367 next if ($mfield =~ /^gsdl/); 389 368 390 391 if (defined $self->{'indexfieldmap'}->{$mfield}) { 392 $shortname = $self->{'indexfieldmap'}->{$mfield}; 369 if (defined $self->{'fieldnamemap'}->{$mfield}) { 370 $shortname = $self->{'fieldnamemap'}->{$mfield}; 371 } else { 372 $shortname = $self->create_shortname($mfield); 373 $self->{'fieldnamemap'}->{$mfield} = $shortname; 374 $self->{'fieldnamemap'}->{$shortname} = 1; 393 375 } 394 else { 395 $shortname = $self->create_shortname($mfield); 396 $self->{'indexfieldmap'}->{$mfield} = $shortname; 397 $self->{'indexfieldmap'}->{$shortname} = 1; 398 } 376 $self->{'allindexfields'}->{$mfield} = 1; 399 377 $new_text .= "$paratag<$shortname>$mvalue</$shortname>\n"; 400 if (!defined $self->{' indexfields'}->{$mfield}) {401 $self->{' indexfields'}->{$mfield} = 1;378 if (!defined $self->{'extraindexfields'}->{$mfield}) { 379 $self->{'extraindexfields'}->{$mfield} = 1; 402 380 } 403 381 … … 426 404 427 405 my ($realname) = @_; 406 my @realnamelist = split(",", $realname); 407 map {$_=~ s/^[a-zA-Z]+\.//;} @realnamelist; #remove namespaces 408 my ($singlename) = $realnamelist[0]; 409 428 410 # try our predefined static mapping 429 if (defined $static_indexfield_map{$realname}) { 430 return $static_indexfield_map{$realname}; 431 } 411 my $name; 412 if (defined ($name = $static_indexfield_map{$singlename})) { 413 if (! defined $self->{'fieldnamemap'}->{$name}) { 414 # has this shortname already been used?? 415 return $static_indexfield_map{$singlename}; 416 } 417 } 418 # we can't use the quick map, so join all fields back together (without namespaces), and try sets of two characters. 419 $realname = join ("", @realnamelist); 432 420 #try the first two chars 433 421 my $shortname; … … 443 431 #if already used, take the first and third letdigs and so on 444 432 my $count = 1; 445 while (defined $self->{' indexfieldmap'}->{$shortname} || defined $static_indexfield_map{$shortname}) {433 while (defined $self->{'fieldnamemap'}->{$shortname} || defined $static_indexfield_map{$shortname}) { 446 434 if ($realname =~ /^[^\w]*(\w)([^\w]*\w){$count}[^\w]*(\w)/) { 447 435 $shortname = "$1$3";
Note:
See TracChangeset
for help on using the changeset viewer.