Changeset 17567


Ignore:
Timestamp:
2008-10-22T13:26:24+13:00 (16 years ago)
Author:
kjdon
Message:

if metadata is specified, only add in the ones that are not already indexed. no point in indexing twice

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/mgppbuildproc.pm

    r17564 r17567  
    266266   
    267267    $self->{'num_bytes'} += $doc_obj->get_text_length ($section);
     268
     269    # has the user added a 'metadata' index?
     270    my $all_metadata_specified = 0;
     271    # which fields have already been indexed? (same as fields, but in a map)
     272    my $specified_fields = {};
    268273    foreach my $field (split (/;/, $fields)) {
    269274        # only deal with this field if it doesn't start with top or
     
    274279        my $new_text = "";
    275280
    276         # we get allfields by default - do nothing
    277         if ($real_field eq "allfields") {
    278    
     281        # we get allfields by default - do nothing except add into the map
     282        if ($real_field eq "allfields") {
     283        $self->{'indexfieldmap'}->{"allfields"} = "ZZ";
     284        $self->{'indexfieldmap'}->{"ZZ"} = 1;
    279285        }
    280286       
    281287        # metadata - output all metadata we know about except gsdl stuff
    282288        # each metadata is in a separate index field
    283         elsif ($real_field eq "metadata") {
    284         my $shortname = "";
    285         my $metadata = $doc_obj->get_all_metadata ($section);
    286         foreach my $pair (@$metadata) {
    287             my ($mfield, $mvalue) = (@$pair);
    288             # check fields here, maybe others dont want - change to use dontindex!!
    289             if ($mfield ne "Identifier"
    290             && $mfield !~ /^gsdl/
    291             && $mfield ne "classifytype"
    292             && $mfield ne "assocfilepath"
    293             && defined $mvalue && $mvalue ne "") {
    294            
    295             if (defined $self->{'indexfieldmap'}->{$mfield}) {
    296                 $shortname = $self->{'indexfieldmap'}->{$mfield};
    297             }
    298             else {
    299                 $shortname = $self->create_shortname($mfield);
    300                 $self->{'indexfieldmap'}->{$mfield} = $shortname;
    301                 $self->{'indexfieldmap'}->{$shortname} = 1;
    302             }     
    303             # should this line only be done if the following test is true?
    304             $new_text .= "$paratag<$shortname>$mvalue</$shortname>\n";
    305             if (!defined $self->{'indexfields'}->{$mfield}) {
    306                 $self->{'indexfields'}->{$mfield} = 1;
    307             }                   
    308             }
    309         }
    310         }
     289        if ($real_field eq "metadata") {
     290        # we will process this later, so we are not reindexing metadata already indexed
     291        $all_metadata_specified = 1;
     292        }
     293       
    311294        else {
     295       
    312296        #individual metadata and or text specified - could be
    313297        # a comma separated list
     298        $specified_fields->{$real_field} = 1;
    314299        my $shortname="";
    315300        my $new_field = 0; # have we found a new field name?
     
    383368        $text .= "$new_text";
    384369    } # foreach field
    385    
     370   
     371    if ($all_metadata_specified) {
     372        my $new_text = "";
     373        my $shortname = "";
     374        my $metadata = $doc_obj->get_all_metadata ($section);
     375        foreach my $pair (@$metadata) {
     376        my ($mfield, $mvalue) = (@$pair);
     377        # no value
     378        next unless defined $mvalue && $mvalue ne "";
     379        # we have already indexed this
     380        next if defined ($specified_fields->{$mfield});
     381        # check fields here, maybe others dont want - change to use dontindex!!
     382        next if ($mfield eq "Identifier" || $mfield eq "classifytype" || $mfield eq "assocfilepath");
     383        next if ($mfield =~ /^gsdl/);
     384       
     385           
     386        if (defined $self->{'indexfieldmap'}->{$mfield}) {
     387            $shortname = $self->{'indexfieldmap'}->{$mfield};
     388        }
     389        else {
     390            $shortname = $self->create_shortname($mfield);
     391            $self->{'indexfieldmap'}->{$mfield} = $shortname;
     392            $self->{'indexfieldmap'}->{$shortname} = 1;
     393        }     
     394        $new_text .= "$paratag<$shortname>$mvalue</$shortname>\n";
     395        if (!defined $self->{'indexfields'}->{$mfield}) {
     396            $self->{'indexfields'}->{$mfield} = 1;
     397        }                   
     398       
     399        }
     400        # filter the text
     401        $new_text = $self->filter_text ("metadata", $new_text);
     402       
     403        $self->{'num_processed_bytes'} += length ($new_text);
     404        $text .= "$new_text";
     405
     406       
     407    }
     408   
    386409    $text .= "$sectionendtag";
    387410    $section = $doc_obj->get_next_section($section);
    388411    } # while defined section
    389412    print $handle "$text\n$documentendtag";
     413    #print STDERR "***********\n$text\n***************\n";
    390414   
    391415}
Note: See TracChangeset for help on using the changeset viewer.