Changeset 17564 for gsdl/trunk/perllib/mgppbuildproc.pm
- Timestamp:
- 2008-10-20T15:33:25+13:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/mgppbuildproc.pm
r17117 r17564 46 46 'paragraph'=>'Para'); 47 47 48 # change this so a user can add their own ones in via a file or cfg 49 #add AND, OR, NOT NEAR to this list - these cannot be used as field names 50 #also add the level names (Doc, Sec, Para) 51 our %static_indexfield_map = ('Title'=>'TI', 52 'TI'=>1, 53 'Subject'=>'SU', 54 'SU'=>1, 55 'Creator'=>'CR', 56 'CR'=>1, 57 'Organization'=>'ORG', 58 'ORG'=>1, 59 'Source'=>'SO', 60 'SO'=>1, 61 'Howto'=>'HT', 62 'HT'=>1, 63 'ItemTitle'=>'IT', 64 'IT'=>1, 65 'ProgNumber'=>'PN', 66 'PN'=>1, 67 'People'=>'PE', 68 'PE'=>1, 69 'Coverage'=>'CO', 70 'CO'=>1, 71 'allfields'=>'ZZ', 72 'ZZ'=>1, 73 'text'=>'TX', 74 'TX'=>1, 75 'AND'=>1, 76 'OR'=>1, 77 'NOT'=>1, 78 'NEAR'=>1, 79 'Doc'=>1, 80 'Sec'=>1, 81 'Para'=>1); 82 83 48 84 sub new { 49 85 my $class = shift @_; … … 62 98 63 99 64 sub set_indexfieldmap { 65 my $self = shift (@_); 66 my ($indexmap) = @_; 67 68 $self->{'indexfieldmap'} = $indexmap; 69 } 100 #sub set_indexfieldmap { 101 # my $self = shift (@_); 102 # my ($indexmap) = @_; 103 104 # $self->{'default_index_field_mapping'} = $indexmap; 105 #$self->{'indexfieldmap'} = $indexmap; 106 #} 70 107 71 108 sub get_indexfieldmap { … … 243 280 244 281 # metadata - output all metadata we know about except gsdl stuff 282 # each metadata is in a separate index field 245 283 elsif ($real_field eq "metadata") { 246 284 my $shortname = ""; … … 263 301 $self->{'indexfieldmap'}->{$shortname} = 1; 264 302 } 303 # should this line only be done if the following test is true? 265 304 $new_text .= "$paratag<$shortname>$mvalue</$shortname>\n"; 266 305 if (!defined $self->{'indexfields'}->{$mfield}) { … … 274 313 # a comma separated list 275 314 my $shortname=""; 315 my $new_field = 0; # have we found a new field name? 316 276 317 if (defined $self->{'indexfieldmap'}->{$real_field}) { 277 318 $shortname = $self->{'indexfieldmap'}->{$real_field}; … … 279 320 else { 280 321 $shortname = $self->create_shortname($real_field); 281 $self->{'indexfieldmap'}->{$real_field} = $shortname; 282 $self->{'indexfieldmap'}->{$shortname} = 1; 322 $new_field = 1; # we want to record this shortname, but only if we have actually found some metadata values 283 323 } 284 # we only want one tag around the index 285 $new_text .= "$paratag<$shortname>"; 286 my @metadata_list = (); 324 my @metadata_list = (); # put any meta values in here 325 my $section_text = ""; # put any text in here 287 326 foreach my $submeta (split /,/, $real_field) { 288 if ($submeta eq "text") { 289 my $section_text = $doc_obj->get_text($section); 290 if ($self->{'indexing_text'}) { 291 if ($paratag ne "") { 292 # we fiddle around with splitting text into paragraphs 293 $new_text .= "</$shortname>$paratag<$shortname>\n"; 294 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paratag<$shortname>"); 327 if ($submeta eq "text") { 328 # no point in indexing text more than once 329 if ($section_text eq "") { 330 $section_text = $doc_obj->get_text($section); 331 if ($self->{'indexing_text'}) { 332 if ($paratag ne "") { 333 # we fiddle around with splitting text into paragraphs 334 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paratag<$shortname>"); 335 } 336 else { 337 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, ""); 338 } 295 339 } 296 else {297 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "");298 }299 $new_text .= "$section_text</$shortname><$shortname>\n";300 340 } 301 else {302 # leave html stuff in, and don't add Paragraph tags - never retrieve paras at the moment303 $new_text .= $section_text;304 }305 341 } 306 342 else { 343 # its a metadata element 307 344 my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)}; 308 345 if ($section ne $doc_obj->get_top_section() && $self->{'indexing_text'} && defined ($self->{'sections_index_document_metadata'})) { … … 313 350 push (@metadata_list, @section_metadata); 314 351 } 352 } # for each field in index 353 354 355 # now we add the text and/or the metadata into new_text 356 if ($section_text ne "" || scalar(@metadata_list)) { 357 $new_text .= "$paratag<$shortname>"; 358 359 if ($section_text ne "") { 360 $new_text .= "$section_text "; 361 if ($paratag ne "" && scalar(@metadata_list)) { 362 $new_text .= "</$shortname>$paratag<$shortname>"; 363 } 364 } 365 foreach my $item (@metadata_list) { 366 $new_text .= "$item "; 367 } 368 $new_text .= "</$shortname>"; 369 370 if ($new_field) { 371 # we need to add to the list in indexfields 372 373 $self->{'indexfieldmap'}->{$real_field} = $shortname; 374 $self->{'indexfieldmap'}->{$shortname} = 1; 375 } 315 376 } 316 foreach my $item (@metadata_list) {317 #$new_text .= "$paratag<$shortname>$item</$shortname>\n";318 $new_text .= "$item ";319 }320 $new_text .= "</$shortname>";321 377 } 322 378 323 379 # filter the text 324 380 $new_text = $self->filter_text ($field, $new_text); … … 341 397 342 398 my ($realname) = @_; 343 #take the first two chars 399 # try our predefined static mapping 400 if (defined $static_indexfield_map{$realname}) { 401 return $static_indexfield_map{$realname}; 402 } 403 #try the first two chars 344 404 my $shortname; 345 405 if ($realname =~ /^[^\w]*(\w)[^\w]*(\w)/) { … … 354 414 #if already used, take the first and third letdigs and so on 355 415 my $count = 1; 356 while (defined $self->{'indexfieldmap'}->{$shortname} ) {416 while (defined $self->{'indexfieldmap'}->{$shortname} || defined $static_indexfield_map{$shortname}) { 357 417 if ($realname =~ /^[^\w]*(\w)([^\w]*\w){$count}[^\w]*(\w)/) { 358 418 $shortname = "$1$3";
Note:
See TracChangeset
for help on using the changeset viewer.