Changeset 23154

Show
Ignore:
Timestamp:
18.10.2010 13:50:10 (9 years ago)
Author:
kjdon
Message:

store a hash of all doc oids, then check against this hash when asked to classify something, so that we don't classify anything twice, as this leads to the document being in the list 4 times

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/List.pm

    r23116 r23154  
    273273    } 
    274274 
    275     # An empty array for the document OIDs 
     275    # An empty array for the document/section OIDs that we are classifying 
    276276    $self->{'OIDs'} = []; 
    277  
     277    # A hash for all the doc ids that we have seen, so we don't classify something twice 
     278    $self->{'all_doc_OIDs'} = {}; 
    278279    return bless $self, $class; 
    279280} 
     
    292293    my ($doc_obj) = @_; 
    293294 
     295    if (defined $self->{'all_doc_OIDs'}->{$doc_obj->get_OID()}) { 
     296    print STDERR "Warning, List classifier has already seen document ".$doc_obj->get_OID().", not classifying again\n"; 
     297    return; 
     298    } 
     299    $self->{'all_doc_OIDs'}->{$doc_obj->get_OID()} = 1; 
    294300    # If "-classify_sections" is set, classify every section of the document 
    295301    if ($self->{'classify_sections'}) { 
     
    304310    $self->classify_section($doc_obj, $doc_obj->get_OID(), $doc_obj->get_top_section()); 
    305311    } 
    306 } 
    307  
     312     
     313} 
    308314 
    309315sub classify_section 
     
    344350    
    345351    # Otherwise, include this section in the classifier 
     352     
    346353    push(@{$self->{'OIDs'}}, $section_OID); 
    347354 
     
    419426    my @OIDs = @{shift(@_)}; 
    420427    my $classifier_node = shift(@_); 
    421     # print STDERR "\nAdding AZ list for " . $classifier_node->{'Title'} . "\n"; 
    422  
     428     
    423429    my $metadata_group = $metadata_groups[0]; 
    424     # print STDERR "Processing metadata group: " . $metadata_group . "\n"; 
    425     # print STDERR "Number of OID values: " . @OIDs . "\n"; 
    426  
     430     
    427431    if (!defined($self->{$metadata_group . ".list"})) { 
    428432    print STDERR "Warning: No metadata values assigned to $metadata_group.\n"; 
     
    444448    } 
    445449    } 
    446     # print STDERR "Number of distinct values: " . scalar(keys %metadata_value_to_OIDs_hash) . "\n"; 
     450     #print STDERR "Number of distinct values: " . scalar(keys %metadata_value_to_OIDs_hash) . "\n"; 
    447451 
    448452    # Partition the values (if necessary) 
     
    778782    { 
    779783    my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 
    780  
    781784    # If there is only one item and 'bookshelf_type' is not always (ie. never or duplicate_only), add the item to the list 
    782785    if (@OIDs == 1 && $self->{$metadata_group . ".bookshelf_type"} ne "always") {