Changeset 14845

Show
Ignore:
Timestamp:
22.11.2007 16:03:51 (12 years ago)
Author:
mdewsnip
Message:

Fixed some efficiency problems with GenericList? when collections get large, and renamed a whole bunch of horrible variable names.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/classify/GenericList.pm

    r14173 r14845  
    307307    return; 
    308308    } 
    309     my %OIDtometavalueshash = %{$self->{$metadata_group . ".list"}}; 
    310309 
    311310    # Create a mapping from metadata value to OID 
    312     my %metavaluetoOIDhash = (); 
    313     foreach my $OID (@OIDs) { 
    314     if ($OIDtometavalueshash{$OID}) { 
    315         my @metavalues = @{$OIDtometavalueshash{$OID}}; 
    316         foreach my $metavalue (@metavalues) { 
    317         push(@{$metavaluetoOIDhash{$metavalue}}, $OID); 
    318         } 
    319     } 
    320     } 
    321     # print STDERR "Number of distinct values: " . scalar(keys %metavaluetoOIDhash) . "\n"; 
     311    my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"}; 
     312    my %metadata_value_to_OIDs_hash = (); 
     313    foreach my $OID (@OIDs) 
     314    { 
     315    if ($OID_to_metadata_values_hash_ref->{$OID}) 
     316    { 
     317        my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}}; 
     318        foreach my $metadata_value (@metadata_values) 
     319        { 
     320        push(@{$metadata_value_to_OIDs_hash{$metadata_value}}, $OID); 
     321        } 
     322    } 
     323    } 
     324    # print STDERR "Number of distinct values: " . scalar(keys %metadata_value_to_OIDs_hash) . "\n"; 
    322325 
    323326    # Partition the values (if necessary) 
     
    325328    if ($partition_type_within_level =~ /^per_letter$/i) { 
    326329    # Generate one hlist for each letter 
    327     my @sortedmetavalues = $self->sort_metadata_values_array(keys(%metavaluetoOIDhash)); 
    328     my %metavaluetoOIDsubhash = (); 
    329  
    330     my $lastpartition = substr($sortedmetavalues[0], 0, 1); 
    331     foreach my $metavalue (@sortedmetavalues) { 
    332         my $metavaluepartition = substr($metavalue, 0, 1); 
     330    my @sortedmetadata_values = $self->sort_metadata_values_array(keys(%metadata_value_to_OIDs_hash)); 
     331    my %metadata_value_to_OIDs_subhash = (); 
     332 
     333    my $lastpartition = substr($sortedmetadata_values[0], 0, 1); 
     334    foreach my $metadata_value (@sortedmetadata_values) { 
     335        my $metadata_valuepartition = substr($metadata_value, 0, 1); 
    333336 
    334337        # Is this the start of a new partition? 
    335         if ($metavaluepartition ne $lastpartition) { 
    336         $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metavaluetoOIDsubhash); 
    337         %metavaluetoOIDsubhash = (); 
    338         $lastpartition = $metavaluepartition; 
    339         } 
    340  
    341         $metavaluetoOIDsubhash{$metavalue} = $metavaluetoOIDhash{$metavalue};        
     338        if ($metadata_valuepartition ne $lastpartition) { 
     339        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metadata_value_to_OIDs_subhash); 
     340        %metadata_value_to_OIDs_subhash = (); 
     341        $lastpartition = $metadata_valuepartition; 
     342        } 
     343 
     344        $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash{$metadata_value};        
    342345    } 
    343346 
    344347    # Don't forget to add the last partition 
    345     $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metavaluetoOIDsubhash); 
     348    $self->add_hlist_partition(\@metadata_groups, $classifier_node, $lastpartition, \%metadata_value_to_OIDs_subhash); 
    346349 
    347350    # The partitions are stored in an HList 
     
    352355    # Generate hlists of a certain size 
    353356    my $partition_size_within_level = $self->{$metadata_group . ".partition_size_within_level"}; 
    354     if ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metavaluetoOIDhash) > $partition_size_within_level) { 
    355         my @sortedmetavalues = $self->sort_metadata_values_array(keys(%metavaluetoOIDhash)); 
     357    if ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metadata_value_to_OIDs_hash) > $partition_size_within_level) { 
     358        my @sortedmetadata_values = $self->sort_metadata_values_array(keys(%metadata_value_to_OIDs_hash)); 
    356359        my $itemsdone = 0; 
    357         my %metavaluetoOIDsubhash = (); 
     360        my %metadata_value_to_OIDs_subhash = (); 
    358361        my $lastpartitionend = ""; 
    359362        my $partitionstart; 
    360         foreach my $metavalue (@sortedmetavalues) { 
    361         $metavaluetoOIDsubhash{$metavalue} = $metavaluetoOIDhash{$metavalue}; 
     363        foreach my $metadata_value (@sortedmetadata_values) { 
     364        $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash{$metadata_value}; 
    362365        $itemsdone++; 
    363         my $itemsinpartition = scalar(keys %metavaluetoOIDsubhash); 
     366        my $itemsinpartition = scalar(keys %metadata_value_to_OIDs_subhash); 
    364367 
    365368        # Is this the start of a new partition? 
    366369        if ($itemsinpartition == 1) { 
    367             $partitionstart = $self->generate_partition_start($metavalue, $lastpartitionend, $self->{"partition_name_length"}); 
     370            $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $self->{"partition_name_length"}); 
    368371        } 
    369372 
    370373        # Is this the end of the partition? 
    371         if ($itemsinpartition == $partition_size_within_level || $itemsdone == @sortedmetavalues) { 
    372             my $partitionend = $self->generate_partition_end($metavalue, $partitionstart, $self->{"partition_name_length"}); 
     374        if ($itemsinpartition == $partition_size_within_level || $itemsdone == @sortedmetadata_values) { 
     375            my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $self->{"partition_name_length"}); 
    373376            my $partitionname = $partitionstart; 
    374377            if ($partitionend ne $partitionstart) { 
     
    376379            } 
    377380 
    378             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metavaluetoOIDsubhash); 
    379             %metavaluetoOIDsubhash = (); 
     381            $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 
     382            %metadata_value_to_OIDs_subhash = (); 
    380383            $lastpartitionend = $partitionend; 
    381384        } 
     
    388391    # Otherwise just add all the values to a VList 
    389392    else { 
    390         $self->add_vlist(\@metadata_groups, $classifier_node, \%metavaluetoOIDhash); 
     393        $self->add_vlist(\@metadata_groups, $classifier_node, \%metadata_value_to_OIDs_hash); 
    391394    } 
    392395    } 
     
    423426{ 
    424427    my $self = shift(@_); 
    425     my $metavalue = shift(@_); 
     428    my $metadata_value = shift(@_); 
    426429    my $lastpartitionend = shift(@_); 
    427430    my $partition_name_length = shift(@_); 
    428431 
    429432    if ($partition_name_length) { 
    430     return substr($metavalue, 0, $partition_name_length); 
    431     } 
    432  
    433     my $partitionstart = substr($metavalue, 0, 1); 
     433    return substr($metadata_value, 0, $partition_name_length); 
     434    } 
     435 
     436    my $partitionstart = substr($metadata_value, 0, 1); 
    434437    if ($partitionstart le $lastpartitionend) { 
    435     $partitionstart = substr($metavalue, 0, 2); 
     438    $partitionstart = substr($metadata_value, 0, 2); 
    436439    # Give up after three characters 
    437440    if ($partitionstart le $lastpartitionend) { 
    438         $partitionstart = substr($metavalue, 0, 3); 
     441        $partitionstart = substr($metadata_value, 0, 3); 
    439442    } 
    440443    } 
     
    447450{ 
    448451    my $self = shift(@_); 
    449     my $metavalue = shift(@_); 
     452    my $metadata_value = shift(@_); 
    450453    my $partitionstart = shift(@_); 
    451454    my $partition_name_length = shift(@_); 
    452455 
    453456    if ($partition_name_length) { 
    454     return substr($metavalue, 0, $partition_name_length); 
    455     } 
    456  
    457     my $partitionend = substr($metavalue, 0, length($partitionstart)); 
     457    return substr($metadata_value, 0, $partition_name_length); 
     458    } 
     459 
     460    my $partitionend = substr($metadata_value, 0, length($partitionstart)); 
    458461    if ($partitionend gt $partitionstart) { 
    459     $partitionend = substr($metavalue, 0, 1); 
     462    $partitionend = substr($metadata_value, 0, 1); 
    460463    if ($partitionend le $partitionstart) { 
    461         $partitionend = substr($metavalue, 0, 2); 
     464        $partitionend = substr($metadata_value, 0, 2); 
    462465        # Give up after three characters 
    463466        if ($partitionend le $partitionstart) { 
    464         $partitionend = substr($metavalue, 0, 3); 
     467        $partitionend = substr($metadata_value, 0, 3); 
    465468        } 
    466469    } 
     
    477480    my $classifier_node = shift(@_); 
    478481    my $partitionname = shift(@_); 
    479     my %metavaluetoOIDhash = %{shift(@_)}; 
     482    my $metadata_value_to_OIDs_hash_ref = shift(@_); 
    480483 
    481484    # Create an hlist partition 
     
    485488 
    486489    # Add the children to the hlist partition 
    487     $self->add_vlist(\@metadata_groups, \%child_classifier_node, \%metavaluetoOIDhash); 
     490    $self->add_vlist(\@metadata_groups, \%child_classifier_node, $metadata_value_to_OIDs_hash_ref); 
    488491    push(@{$classifier_node->{'contains'}}, \%child_classifier_node); 
    489492} 
     
    495498    my @metadata_groups = @{shift(@_)}; 
    496499    my $classifier_node = shift(@_); 
    497     my %metavaluetoOIDhash = %{shift(@_)}; 
     500    my $metadata_value_to_OIDs_hash_ref = shift(@_); 
    498501 
    499502    my $metadata_group = shift(@metadata_groups); 
     
    501504 
    502505    # Create an entry in the vlist for each value 
    503     foreach my $metavalue ($self->sort_metadata_values_array(keys(%metavaluetoOIDhash))) { 
    504     my @OIDs = @{$metavaluetoOIDhash{$metavalue}}; 
     506    foreach my $metadata_value ($self->sort_metadata_values_array(keys(%{$metadata_value_to_OIDs_hash_ref}))) 
     507    { 
     508    my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 
    505509 
    506510    # If there is only one item and 'always_bookshelf' is false, add the item to the list 
     
    510514        # Find the offset of this metadata value 
    511515        my $offset = 0; 
    512         my %OIDtometavalueshash = %{$self->{$metadata_group . ".list"}}; 
    513         my @metavalues = @{$OIDtometavalueshash{$OID}}; 
    514         for (my $i = 0; $i < scalar(@metavalues); $i++) { 
    515         if ($metavalue eq $metavalues[$i]) { 
     516        my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"}; 
     517        my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}}; 
     518        for (my $i = 0; $i < scalar(@metadata_values); $i++) { 
     519        if ($metadata_value eq $metadata_values[$i]) { 
    516520            $offset = $i; 
    517521            last; 
     
    523527    # Otherwise create a sublist (bookshelf) for the metadata value 
    524528    else { 
    525         my %child_classifier_node = ( 'Title' => $self->convert_unicode_string_to_utf8_string($metavalue), 
     529        my %child_classifier_node = ( 'Title' => $self->convert_unicode_string_to_utf8_string($metadata_value), 
    526530                      'childtype' => "VList", 
    527531                      'contains' => [] ); 
     
    538542        my @sort_leaf_nodes_usingmetadata_groups = @{$self->{'sort_leaf_nodes_using_metadata_groups'}}; 
    539543        foreach my $sort_leaf_nodes_usingmetaelem (reverse @sort_leaf_nodes_usingmetadata_groups) { 
    540             my %OIDtometavalueshash = %{$self->{$sort_leaf_nodes_usingmetaelem . ".list"}}; 
     544            my $OID_to_metadata_values_hash_ref = $self->{$sort_leaf_nodes_usingmetaelem . ".list"}; 
    541545            # Force a stable sort (Perl 5.6's sort isn't stable) 
    542546            # !! The [0] bits aren't ideal (multiple metadata values) !!  
    543             @OIDs = @OIDs[ sort { $OIDtometavalueshash{$OIDs[$a]}[0] cmp $OIDtometavalueshash{$OIDs[$b]}[0] || $a <=> $b; } 0..$#OIDs ]; 
     547            @OIDs = @OIDs[ sort { $OID_to_metadata_values_hash_ref->{$OIDs[$a]}[0] cmp $OID_to_metadata_values_hash_ref->{$OIDs[$b]}[0] || $a <=> $b; } 0..$#OIDs ]; 
    544548        } 
    545549