Changeset 33463

Show
Ignore:
Timestamp:
12.09.2019 14:17:11 (6 days ago)
Author:
kjdon
Message:

fixed up some typos. removed use_hlist_for option. This is very hard to understand and actually only works if partition_type is none, and bookshelf_type is always - puts the bookshelves into hlist instead of vlist. instead, I have added all_values partition type. This just makes each value become a partition. Might be useful for Dates, or other short classification id.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/List.pm

    r33460 r33463  
    314314    # Default: duplicate_only, ie. leave leaf nodes ungrouped (equivalent to AZCompactList -mingroup 2) 
    315315    $self->{$last_metadata_group . ".bookshelf_type"} = $self->{'bookshelf_type'}; 
    316          
    317     # Whether to use an hlist or a vlist for each level in the hierarchy (default: vlist) 
    318     foreach my $metadata_group (@metadata_groups) { 
    319     $self->{$metadata_group . ".list_type"} = "VList"; 
    320     } 
    321     foreach my $metadata_group (split(/\,/, $self->{'use_hlist_for'})) { 
    322     $self->{$metadata_group . ".list_type"} = "HList"; 
    323     } 
    324316 
    325317    # How the items are grouped into partitions (default: by letter)  
    326318    # for each level (metadata group), separated by '/' 
    327  
    328319    $self->set_metadata_groups_info_per_level("partition_type_within_level", $partition_type_default, $valid_partition_types); 
    329  
    330320    $self->set_metadata_groups_info_per_level("numeric_partition_type_within_level", $numeric_partition_type_default, $valid_numeric_partition_types); 
    331321 
     
    425415    # A hash for all the doc ids that we have seen, so we don't classify something twice 
    426416    $self->{'all_doc_OIDs'} = {}; 
    427     #return bless $self, $class; 
    428417    return $self; 
    429418} 
     
    443432 
    444433    if (!defined $self->{$info_name}) { 
    445     print STDERR "**** undefined $info_name\n"; 
     434    print STDERR "List Error: no values were set for option $info_name\n"; 
    446435    } 
    447436    my @info_list = split(/\//, $self->{$info_name}); 
     
    581570            my $lc_metadata_value = lc($metadata_value); 
    582571            $lc_metadata_value  = &sorttools::format_metadata_for_sorting($real_metadata_element, $lc_metadata_value, $doc_obj) unless $self->{'no_metadata_formatting'}; 
    583             #print STDERR "formatted value = $lc_metadata_value\n"; 
    584572     
    585573            # Add the metadata value into the list for this combination of metadata group  
     
    623611    # Create the root node of the classification hierarchy 
    624612    my %classifier_node = ( 'thistype' => "Invisible", 
    625                 'childtype' => $self->{$first_metadata_group . ".list_type"}, 
     613                #'childtype' => $self->{$first_metadata_group . ".list_type"}, 
    626614                'Title' => $self->{'buttonname'}, 
    627615                'contains' => [], 
     
    695683    }  
    696684    elsif ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metadata_value_to_OIDs_hash) > $partition_size_within_level) { 
    697         $self->split_constant_size(\@metadata_groups, $classifier_node, $partition_size_within_level, $partition_sort_mode_within_level, \%metadata_value_to_OIDs_hash, $self->{'partition_name_length'}); 
     685        $self->split_constant_size(\@metadata_groups, $classifier_node, $partition_size_within_level, $partition_sort_mode_within_level, $bookshelf_type_within_level, \%metadata_value_to_OIDs_hash, $self->{'partition_name_length'}); 
    698686    } 
    699687         
     
    743731    # Generate hlists of a certain size 
    744732     
    745     $self->split_constant_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1); 
     733    $self->split_constant_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $bookshelf_type_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1); 
    746734    } elsif ($numeric_partition_type_within_level eq "approximate_size" && scalar(keys %$numeric_metadata_value_to_OIDs_hash_ref) > $numeric_partition_size_within_level) { 
    747735    $self->split_approximate_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $bookshelf_type_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1); 
     
    818806    for (my $i = 0; $i < scalar(@partition_buckets) - 1; $i++) { 
    819807     
    820     my $this_bucket = $partition_buckets[$i]; #->{'name'}; 
    821     my $next_bucket = $partition_buckets[$i+1]; #->{'name'}; 
     808    my $this_bucket = $partition_buckets[$i];  
     809    my $next_bucket = $partition_buckets[$i+1];  
    822810             
    823811    my $items_in_partition = $this_bucket->{'size'};                 
     
    848836    my $last_partition_end = ""; 
    849837    my $partition_start = ""; 
     838    my $partition_end = ""; 
     839    my $partition_name = ""; 
    850840    foreach my $partition (@new_partition_buckets) { 
    851841    my @metadata_values = $self->sort_metadata_values_array($sort_mode, @{$partition->{'metadata_values'}}); 
     
    853843    $partition_start = $self->generate_partition_start($metadata_values[0], $last_partition_end, $partition_name_length, $is_numeric); 
    854844     
    855     if ($items_in_partition > $partition_size) { 
     845    if ($items_in_partition <= $partition_size) { 
     846        # we can just add the partition as is 
     847        my %metadata_values_to_OIDs_subhashes = (); 
     848        for (my $i = 0; $i < scalar(@metadata_values); $i++) { 
     849        my $metadata_value = $metadata_values[$i]; 
     850        $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};           
     851        } 
     852        my $last_metadata_value = $metadata_values[scalar(@metadata_values)-1]; 
     853        $partition_end = $self->generate_partition_end($last_metadata_value, $partition_start, $partition_name_length, $is_numeric); 
     854        $partition_name = $partition_start; 
     855        if ($partition_end ne $partition_start) { 
     856        $partition_name = $partition_name . "-" . $partition_end; 
     857        } 
     858        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes);           
     859        $last_partition_end = $partition_end;        
     860    } else { 
     861        # we have too many items, need to split the partition 
    856862        my $items_done = 0; 
    857863        my %metadata_values_to_OIDs_subhashes = (); 
     
    861867        my $items_for_this_md_value = $bookshelf_type eq "never" ? scalar(@{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}) : 1; 
    862868         
    863         my $partitionend = $self->generate_partition_end($metadata_value, $partition_start, $partition_name_length, $is_numeric); 
    864         my $partitionname = $partition_start; 
    865         if ($partitionend ne $partition_start) { 
    866             $partitionname = $partitionname . "-" . $partitionend; 
    867         } 
    868          
    869869        if ($items_done + $items_for_this_md_value > $partition_size && $items_done != 0) { 
    870             # Start a new partition 
    871             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes); 
    872             $last_partition_end = $partitionend;             
     870            # Save the stored items into a partition 
     871            $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes); 
     872            $last_partition_end = $partition_end;            
    873873            $partition_start = $self->generate_partition_start($metadata_value, $last_partition_end, $partition_name_length, $is_numeric); 
    874874            $items_done = 0; 
     
    878878        # If bookshelf_type is "never" and the current metadata value holds too many items, need to split into several partitions 
    879879        if ($bookshelf_type eq "never" && $items_for_this_md_value > $partition_size) { 
    880              
     880 
    881881            my $partitionname_for_this_value = $self->generate_partition_start($metadata_value, $last_partition_end, $partition_name_length, $is_numeric); 
    882882            # Get the number of partitions needed for this value 
    883883            my $num_splits = int($items_for_this_md_value / $partition_size); 
    884884            $num_splits++ if ($items_for_this_md_value / $partition_size > $num_splits); 
    885              
    886             my @OIDs_for_this_value = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 
     885            my @OIDs_for_this_value = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 
    887886            for (my $i = 0; $i < $num_splits; $i++) { 
    888887            my %OIDs_subhashes_for_this_value = (); 
     
    894893            # The last bucket might have only a few items and need to be merged with buckets for subsequent metadata values  
    895894            if ($i == $num_splits - 1 && scalar(@OIDs_for_this_partition) < $partition_size) { 
     895                $partition_start = $partitionname_for_this_value; 
     896                $partition_name = $partition_start; 
    896897                $metadata_values_to_OIDs_subhashes{$metadata_value} = \@OIDs_for_this_partition; 
    897898                $items_done += scalar(@OIDs_for_this_partition); 
    898                 next; 
     899                $last_partition_end = $partitionname_for_this_value 
     900            } else { 
     901             
     902                # Add an HList for this bucket 
     903                $OIDs_subhashes_for_this_value{$metadata_value} = \@OIDs_for_this_partition; 
     904                $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname_for_this_value, \%OIDs_subhashes_for_this_value); 
     905                $last_partition_end = $partitionname_for_this_value; 
    899906            } 
    900              
    901             # Add an HList for this bucket 
    902             $OIDs_subhashes_for_this_value{$metadata_value} = \@OIDs_for_this_partition; 
    903             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname_for_this_value, \%OIDs_subhashes_for_this_value); 
    904             $last_partition_end = $partitionname_for_this_value; 
    905907            } 
    906             next; 
     908        } else { 
     909                     
     910            $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 
     911            $items_done += $bookshelf_type eq "never" ? scalar(@{$metadata_values_to_OIDs_subhashes{$metadata_value}}) : 1; 
     912             $partition_end = $self->generate_partition_end($metadata_value, $partition_start, $partition_name_length, $is_numeric); 
     913             $partition_name = $partition_start; 
     914            if ($partition_end ne $partition_start) { 
     915            $partition_name = $partition_name . "-" . $partition_end; 
     916            } 
     917             
    907918        } 
    908                          
    909         $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 
    910         $items_done += $bookshelf_type eq "never" ? scalar(@{$metadata_values_to_OIDs_subhashes{$metadata_value}}) : 1;          
    911919         
    912920        # The last partition 
    913         if($i == scalar(@metadata_values) - 1) { 
    914             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes); 
     921        if($i == scalar(@metadata_values) - 1 && $items_done >0) { 
     922            $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes); 
    915923        } 
    916         }        
     924         
     925        } 
    917926    } # end if items in partition > partition size 
    918     else { 
    919         # The easier case, just add a partition 
    920         my %metadata_values_to_OIDs_subhashes = (); 
    921         for (my $i = 0; $i < scalar(@metadata_values); $i++) { 
    922         my $metadata_value = $metadata_values[$i]; 
    923         $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};           
    924         } 
    925         my $last_metadata_value = $metadata_values[scalar(@metadata_values)-1]; 
    926         my $partitionend = $self->generate_partition_end($last_metadata_value, $partition_start, $partition_name_length, $is_numeric); 
    927         my $partitionname = $partition_start; 
    928         if ($partitionend ne $partition_start) { 
    929         $partitionname = $partitionname . "-" . $partitionend; 
    930         } 
    931      
    932         $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes);            
    933         $last_partition_end = $partitionend;         
    934     } 
     927 
    935928    }    
    936929     
     
    947940    my $partition_size = shift(@_); 
    948941    my $sort_mode = shift(@_); 
     942    my $bookshelf_type = shift(@_);  
    949943    my $metadata_value_to_OIDs_hash_ref = shift(@_); 
    950944    my $partition_name_length = shift(@_); 
     
    952946 
    953947    my @sortedmetadata_values = $self->sort_metadata_values_array($sort_mode, keys(%$metadata_value_to_OIDs_hash_ref)); 
    954     my $itemsdone = 0; 
    955     my $numitems = scalar(@sortedmetadata_values); 
     948    my $items_in_partition = 0; 
    956949    my %metadata_value_to_OIDs_subhash = (); 
    957950    my $lastpartitionend = ""; 
    958951    my $partitionstart; 
     952     
    959953    foreach my $metadata_value (@sortedmetadata_values) { 
    960     $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 
    961     $itemsdone++; 
    962     my $itemsinpartition = scalar(keys %metadata_value_to_OIDs_subhash); 
    963  
    964     # Is this the start of a new partition? 
    965     if ($itemsinpartition == 1) { 
     954    if ($items_in_partition == 0) { 
     955        # a new partition, set the name 
    966956        $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric); 
    967957    } 
    968  
    969     # Is this the end of the partition? 
    970     if ($itemsinpartition == $partition_size || $itemsdone == $numitems) { 
     958    my $numitems_for_this_value = ($bookshelf_type eq "never" ? scalar(@{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}) : 1); 
     959    if ($items_in_partition + $numitems_for_this_value <= $partition_size) { 
     960        # add all the current values into the temporary list 
     961        $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 
     962        $items_in_partition += $numitems_for_this_value; 
     963    } elsif ($items_in_partition < $partition_size) { 
     964        # only want to add some of the values into temporary list 
     965        # note, we only get here if bookshelf type is never 
     966        my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 
     967        @OIDs = $self->sort_leaf_items(\@OIDs); 
     968        my $num_items_needed = $partition_size - $items_in_partition; 
     969        my @slice = splice(@OIDs, 0, $num_items_needed); 
     970        $metadata_value_to_OIDs_subhash{$metadata_value} = \@slice; 
     971 
     972        # now we have filled up the partition 
    971973        my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric); 
    972974        my $partitionname = $partitionstart; 
     
    977979        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 
    978980        %metadata_value_to_OIDs_subhash = (); 
     981        $items_in_partition = 0; 
    979982        $lastpartitionend = $partitionend; 
    980     } 
    981     } 
    982  
     983 
     984        # can we get more partitions from this metadata value? 
     985        while (scalar(@OIDs) >= $partition_size) { 
     986        my @slice = splice(@OIDs, 0, $partition_size); 
     987        $metadata_value_to_OIDs_subhash{$metadata_value} = \@slice; 
     988        $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric); 
     989        my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric); 
     990        my $partitionname = $partitionstart; 
     991        if ($partitionend ne $partitionstart) { 
     992            $partitionname = $partitionname . "-" . $partitionend; 
     993        } 
     994        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 
     995        %metadata_value_to_OIDs_subhash = (); 
     996        $items_in_partition = 0; 
     997        $lastpartitionend = $partitionend; 
     998 
     999        } 
     1000        if (scalar(@OIDs) > 0) { 
     1001        $metadata_value_to_OIDs_subhash{$metadata_value} = \@OIDs; 
     1002        $items_in_partition = scalar(@OIDs); 
     1003        $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric); 
     1004        } 
     1005         
     1006         
     1007    } 
     1008 
     1009    if ($items_in_partition == $partition_size) { 
     1010        # its the end of a partition 
     1011        my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric); 
     1012        my $partitionname = $partitionstart; 
     1013        if ($partitionend ne $partitionstart) { 
     1014        $partitionname = $partitionname . "-" . $partitionend; 
     1015        } 
     1016 
     1017        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 
     1018        %metadata_value_to_OIDs_subhash = (); 
     1019        $items_in_partition = 0; 
     1020        $lastpartitionend = $partitionend; 
     1021    } 
     1022    } # foreach metadata value 
     1023 
     1024    if ($items_in_partition > 0) { 
     1025    # we have to add the last partition 
     1026    my $partitionend = $self->generate_partition_end(@sortedmetadata_values[@sortedmetadata_values-1], $partitionstart, $partition_name_length, $is_numeric); 
     1027    my $partitionname = $partitionstart; 
     1028    if ($partitionend ne $partitionstart) { 
     1029        $partitionname = $partitionname . "-" . $partitionend; 
     1030    } 
     1031     
     1032    $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 
     1033    } 
     1034     
    9831035    # The partitions are stored in an HList 
    9841036    $classifier_node->{'childtype'} = "HList"; 
     
    11341186    my $classifier_node = shift(@_); 
    11351187    my $metadata_value_to_OIDs_hash_ref = shift(@_); 
    1136  
    11371188    my $metadata_group = shift(@metadata_groups); 
    11381189    $classifier_node->{'mdtype'} = $metadata_group; 
     
    11431194    { 
    11441195    my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 
     1196 
    11451197    # If there is only one item and 'bookshelf_type' is not always (ie. never or duplicate_only), add the item to the list 
    11461198    if (@OIDs == 1 && $self->{$metadata_group . ".bookshelf_type"} ne "always") { 
     
    11511203    # If 'bookshelf_type' is 'never', list all the items even if there are duplicated values 
    11521204    elsif ($self->{$metadata_group . ".bookshelf_type"} eq "never") { 
     1205         
    11531206        @OIDs = $self->sort_leaf_items(\@OIDs); 
    11541207        foreach my $OID (@OIDs) { 
     
    11691222        if (@metadata_groups > 0) { 
    11701223        my $next_metadata_group = $metadata_groups[0];       
    1171         $child_classifier_node{'childtype'} = $self->{$next_metadata_group . ".list_type"}; 
    1172  
    1173         # separate metadata into those that below in the next/sub-metadata_group 
    1174         # and those that below at the current level's metadata_group 
     1224         
     1225        # separate metadata into those that belong in the next/sub-metadata_group 
     1226        # and those that belong at the current level's metadata_group 
    11751227 
    11761228        my $OID_to_metadata_values_hash_ref = $self->{$next_metadata_group . ".list"}; 
     
    12181270    my $OID = shift(@_); 
    12191271    my $metadata_value = shift(@_); 
    1220      
     1272 
    12211273    my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"}; 
    12221274    my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}};