Ignore:
Timestamp:
2007-11-22T16:03:51+13:00 (16 years ago)
Author:
mdewsnip
Message:

Fixed some efficiency problems with GenericList when collections get large, and renamed a whole bunch of horrible variable names.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/classify/GenericList.pm

    r14173 r14845  
    307307    return;
    308308    }
    309     my %OIDtometavalueshash = %{$self->{$metadata_group . ".list"}};
    310309
    311310    # Create a mapping from metadata value to OID
    312     my %metavaluetoOIDhash = ();
    313     foreach my $OID (@OIDs) {
    314     if ($OIDtometavalueshash{$OID}) {
    315         my @metavalues = @{$OIDtometavalueshash{$OID}};
    316         foreach my $metavalue (@metavalues) {
    317         push(@{$metavaluetoOIDhash{$metavalue}}, $OID);
    318         }
    319     }
    320     }
    321     # print STDERR "Number of distinct values: " . scalar(keys %metavaluetoOIDhash) . "\n";
     311    my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"};
     312    my %metadata_value_to_OIDs_hash = ();
     313    foreach my $OID (@OIDs)
     314    {
     315    if ($OID_to_metadata_values_hash_ref->{$OID})
     316    {
     317        my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}};
     318        foreach my $metadata_value (@metadata_values)
     319        {
     320        push(@{$metadata_value_to_OIDs_hash{$metadata_value}}, $OID);
     321        }
     322    }
     323    }
     324    # print STDERR "Number of distinct values: " . scalar(keys %metadata_value_to_OIDs_hash) . "\n";
    322325
    323326    # Partition the values (if necessary)
     
    325328    if ($partition_type_within_level =~ /^per_letter$/i) {
    326329    # Generate one hlist for each letter
    327     my @sortedmetavalues = $self->sort_metadata_values_array(keys(%metavaluetoOIDhash));
    328     my %metavaluetoOIDsubhash = ();
    329 
    330     my $lastpartition = substr($sortedmetavalues[0], 0, 1);
    331     foreach my $metavalue (@sortedmetavalues) {
    332         my $metavaluepartition = substr($metavalue, 0, 1);
     330    my @sortedmetadata_values = $self->sort_metadata_values_array(keys(%metadata_value_to_OIDs_hash));
     331    my %metadata_value_to_OIDs_subhash = ();
     332
     333    my $lastpartition = substr($sortedmetadata_values[0], 0, 1);
     334    foreach my $metadata_value (@sortedmetadata_values) {
     335        my $metadata_valuepartition = substr($metadata_value, 0, 1);
    333336
    334337        # Is this the start of a new partition?
    335         if ($metavaluepartition ne $lastpartition) {
    336         $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metavaluetoOIDsubhash);
    337         %metavaluetoOIDsubhash = ();
    338         $lastpartition = $metavaluepartition;
    339         }
    340 
    341         $metavaluetoOIDsubhash{$metavalue} = $metavaluetoOIDhash{$metavalue};       
     338        if ($metadata_valuepartition ne $lastpartition) {
     339        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metadata_value_to_OIDs_subhash);
     340        %metadata_value_to_OIDs_subhash = ();
     341        $lastpartition = $metadata_valuepartition;
     342        }
     343
     344        $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash{$metadata_value};       
    342345    }
    343346
    344347    # Don't forget to add the last partition
    345     $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metavaluetoOIDsubhash);
     348    $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metadata_value_to_OIDs_subhash);
    346349
    347350    # The partitions are stored in an HList
     
    352355    # Generate hlists of a certain size
    353356    my $partition_size_within_level = $self->{$metadata_group . ".partition_size_within_level"};
    354     if ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metavaluetoOIDhash) > $partition_size_within_level) {
    355         my @sortedmetavalues = $self->sort_metadata_values_array(keys(%metavaluetoOIDhash));
     357    if ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metadata_value_to_OIDs_hash) > $partition_size_within_level) {
     358        my @sortedmetadata_values = $self->sort_metadata_values_array(keys(%metadata_value_to_OIDs_hash));
    356359        my $itemsdone = 0;
    357         my %metavaluetoOIDsubhash = ();
     360        my %metadata_value_to_OIDs_subhash = ();
    358361        my $lastpartitionend = "";
    359362        my $partitionstart;
    360         foreach my $metavalue (@sortedmetavalues) {
    361         $metavaluetoOIDsubhash{$metavalue} = $metavaluetoOIDhash{$metavalue};
     363        foreach my $metadata_value (@sortedmetadata_values) {
     364        $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash{$metadata_value};
    362365        $itemsdone++;
    363         my $itemsinpartition = scalar(keys %metavaluetoOIDsubhash);
     366        my $itemsinpartition = scalar(keys %metadata_value_to_OIDs_subhash);
    364367
    365368        # Is this the start of a new partition?
    366369        if ($itemsinpartition == 1) {
    367             $partitionstart = $self->generate_partition_start($metavalue, $lastpartitionend, $self->{"partition_name_length"});
     370            $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $self->{"partition_name_length"});
    368371        }
    369372
    370373        # Is this the end of the partition?
    371         if ($itemsinpartition == $partition_size_within_level || $itemsdone == @sortedmetavalues) {
    372             my $partitionend = $self->generate_partition_end($metavalue, $partitionstart, $self->{"partition_name_length"});
     374        if ($itemsinpartition == $partition_size_within_level || $itemsdone == @sortedmetadata_values) {
     375            my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $self->{"partition_name_length"});
    373376            my $partitionname = $partitionstart;
    374377            if ($partitionend ne $partitionstart) {
     
    376379            }
    377380
    378             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metavaluetoOIDsubhash);
    379             %metavaluetoOIDsubhash = ();
     381            $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash);
     382            %metadata_value_to_OIDs_subhash = ();
    380383            $lastpartitionend = $partitionend;
    381384        }
     
    388391    # Otherwise just add all the values to a VList
    389392    else {
    390         $self->add_vlist(\@metadata_groups, $classifier_node, \%metavaluetoOIDhash);
     393        $self->add_vlist(\@metadata_groups, $classifier_node, \%metadata_value_to_OIDs_hash);
    391394    }
    392395    }
     
    423426{
    424427    my $self = shift(@_);
    425     my $metavalue = shift(@_);
     428    my $metadata_value = shift(@_);
    426429    my $lastpartitionend = shift(@_);
    427430    my $partition_name_length = shift(@_);
    428431
    429432    if ($partition_name_length) {
    430     return substr($metavalue, 0, $partition_name_length);
    431     }
    432 
    433     my $partitionstart = substr($metavalue, 0, 1);
     433    return substr($metadata_value, 0, $partition_name_length);
     434    }
     435
     436    my $partitionstart = substr($metadata_value, 0, 1);
    434437    if ($partitionstart le $lastpartitionend) {
    435     $partitionstart = substr($metavalue, 0, 2);
     438    $partitionstart = substr($metadata_value, 0, 2);
    436439    # Give up after three characters
    437440    if ($partitionstart le $lastpartitionend) {
    438         $partitionstart = substr($metavalue, 0, 3);
     441        $partitionstart = substr($metadata_value, 0, 3);
    439442    }
    440443    }
     
    447450{
    448451    my $self = shift(@_);
    449     my $metavalue = shift(@_);
     452    my $metadata_value = shift(@_);
    450453    my $partitionstart = shift(@_);
    451454    my $partition_name_length = shift(@_);
    452455
    453456    if ($partition_name_length) {
    454     return substr($metavalue, 0, $partition_name_length);
    455     }
    456 
    457     my $partitionend = substr($metavalue, 0, length($partitionstart));
     457    return substr($metadata_value, 0, $partition_name_length);
     458    }
     459
     460    my $partitionend = substr($metadata_value, 0, length($partitionstart));
    458461    if ($partitionend gt $partitionstart) {
    459     $partitionend = substr($metavalue, 0, 1);
     462    $partitionend = substr($metadata_value, 0, 1);
    460463    if ($partitionend le $partitionstart) {
    461         $partitionend = substr($metavalue, 0, 2);
     464        $partitionend = substr($metadata_value, 0, 2);
    462465        # Give up after three characters
    463466        if ($partitionend le $partitionstart) {
    464         $partitionend = substr($metavalue, 0, 3);
     467        $partitionend = substr($metadata_value, 0, 3);
    465468        }
    466469    }
     
    477480    my $classifier_node = shift(@_);
    478481    my $partitionname = shift(@_);
    479     my %metavaluetoOIDhash = %{shift(@_)};
     482    my $metadata_value_to_OIDs_hash_ref = shift(@_);
    480483
    481484    # Create an hlist partition
     
    485488
    486489    # Add the children to the hlist partition
    487     $self->add_vlist(\@metadata_groups, \%child_classifier_node, \%metavaluetoOIDhash);
     490    $self->add_vlist(\@metadata_groups, \%child_classifier_node, $metadata_value_to_OIDs_hash_ref);
    488491    push(@{$classifier_node->{'contains'}}, \%child_classifier_node);
    489492}
     
    495498    my @metadata_groups = @{shift(@_)};
    496499    my $classifier_node = shift(@_);
    497     my %metavaluetoOIDhash = %{shift(@_)};
     500    my $metadata_value_to_OIDs_hash_ref = shift(@_);
    498501
    499502    my $metadata_group = shift(@metadata_groups);
     
    501504
    502505    # Create an entry in the vlist for each value
    503     foreach my $metavalue ($self->sort_metadata_values_array(keys(%metavaluetoOIDhash))) {
    504     my @OIDs = @{$metavaluetoOIDhash{$metavalue}};
     506    foreach my $metadata_value ($self->sort_metadata_values_array(keys(%{$metadata_value_to_OIDs_hash_ref})))
     507    {
     508    my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}};
    505509
    506510    # If there is only one item and 'always_bookshelf' is false, add the item to the list
     
    510514        # Find the offset of this metadata value
    511515        my $offset = 0;
    512         my %OIDtometavalueshash = %{$self->{$metadata_group . ".list"}};
    513         my @metavalues = @{$OIDtometavalueshash{$OID}};
    514         for (my $i = 0; $i < scalar(@metavalues); $i++) {
    515         if ($metavalue eq $metavalues[$i]) {
     516        my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"};
     517        my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}};
     518        for (my $i = 0; $i < scalar(@metadata_values); $i++) {
     519        if ($metadata_value eq $metadata_values[$i]) {
    516520            $offset = $i;
    517521            last;
     
    523527    # Otherwise create a sublist (bookshelf) for the metadata value
    524528    else {
    525         my %child_classifier_node = ( 'Title' => $self->convert_unicode_string_to_utf8_string($metavalue),
     529        my %child_classifier_node = ( 'Title' => $self->convert_unicode_string_to_utf8_string($metadata_value),
    526530                      'childtype' => "VList",
    527531                      'contains' => [] );
     
    538542        my @sort_leaf_nodes_usingmetadata_groups = @{$self->{'sort_leaf_nodes_using_metadata_groups'}};
    539543        foreach my $sort_leaf_nodes_usingmetaelem (reverse @sort_leaf_nodes_usingmetadata_groups) {
    540             my %OIDtometavalueshash = %{$self->{$sort_leaf_nodes_usingmetaelem . ".list"}};
     544            my $OID_to_metadata_values_hash_ref = $self->{$sort_leaf_nodes_usingmetaelem . ".list"};
    541545            # Force a stable sort (Perl 5.6's sort isn't stable)
    542546            # !! The [0] bits aren't ideal (multiple metadata values) !!
    543             @OIDs = @OIDs[ sort { $OIDtometavalueshash{$OIDs[$a]}[0] cmp $OIDtometavalueshash{$OIDs[$b]}[0] || $a <=> $b; } 0..$#OIDs ];
     547            @OIDs = @OIDs[ sort { $OID_to_metadata_values_hash_ref->{$OIDs[$a]}[0] cmp $OID_to_metadata_values_hash_ref->{$OIDs[$b]}[0] || $a <=> $b; } 0..$#OIDs ];
    544548        }
    545549
Note: See TracChangeset for help on using the changeset viewer.