Changeset 23154


Ignore:
Timestamp:
2010-10-18T13:50:10+13:00 (14 years ago)
Author:
kjdon
Message:

store a hash of all doc oids, then check against this hash when asked to classify something, so that we don't classify anything twice, as this leads to the document being in the list 4 times

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/List.pm

    r23116 r23154  
    273273    }
    274274
    275     # An empty array for the document OIDs
     275    # An empty array for the document/section OIDs that we are classifying
    276276    $self->{'OIDs'} = [];
    277 
     277    # A hash for all the doc ids that we have seen, so we don't classify something twice
     278    $self->{'all_doc_OIDs'} = {};
    278279    return bless $self, $class;
    279280}
     
    292293    my ($doc_obj) = @_;
    293294
     295    if (defined $self->{'all_doc_OIDs'}->{$doc_obj->get_OID()}) {
     296    print STDERR "Warning, List classifier has already seen document ".$doc_obj->get_OID().", not classifying again\n";
     297    return;
     298    }
     299    $self->{'all_doc_OIDs'}->{$doc_obj->get_OID()} = 1;
    294300    # If "-classify_sections" is set, classify every section of the document
    295301    if ($self->{'classify_sections'}) {
     
    304310    $self->classify_section($doc_obj, $doc_obj->get_OID(), $doc_obj->get_top_section());
    305311    }
    306 }
    307 
     312   
     313}
    308314
    309315sub classify_section
     
    344350   
    345351    # Otherwise, include this section in the classifier
     352   
    346353    push(@{$self->{'OIDs'}}, $section_OID);
    347354
     
    419426    my @OIDs = @{shift(@_)};
    420427    my $classifier_node = shift(@_);
    421     # print STDERR "\nAdding AZ list for " . $classifier_node->{'Title'} . "\n";
    422 
     428   
    423429    my $metadata_group = $metadata_groups[0];
    424     # print STDERR "Processing metadata group: " . $metadata_group . "\n";
    425     # print STDERR "Number of OID values: " . @OIDs . "\n";
    426 
     430   
    427431    if (!defined($self->{$metadata_group . ".list"})) {
    428432    print STDERR "Warning: No metadata values assigned to $metadata_group.\n";
     
    444448    }
    445449    }
    446     # print STDERR "Number of distinct values: " . scalar(keys %metadata_value_to_OIDs_hash) . "\n";
     450     #print STDERR "Number of distinct values: " . scalar(keys %metadata_value_to_OIDs_hash) . "\n";
    447451
    448452    # Partition the values (if necessary)
     
    778782    {
    779783    my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}};
    780 
    781784    # If there is only one item and 'bookshelf_type' is not always (ie. never or duplicate_only), add the item to the list
    782785    if (@OIDs == 1 && $self->{$metadata_group . ".bookshelf_type"} ne "always") {
Note: See TracChangeset for help on using the changeset viewer.