Changeset 33463


Ignore:
Timestamp:
2019-09-12T14:17:11+12:00 (5 years ago)
Author:
kjdon
Message:

fixed up some typos. removed use_hlist_for option. This is very hard to understand and actually only works if partition_type is none, and bookshelf_type is always - puts the bookshelves into hlist instead of vlist. instead, I have added all_values partition type. This just makes each value become a partition. Might be useful for Dates, or other short classification id.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/List.pm

    r33460 r33463  
    314314    # Default: duplicate_only, ie. leave leaf nodes ungrouped (equivalent to AZCompactList -mingroup 2)
    315315    $self->{$last_metadata_group . ".bookshelf_type"} = $self->{'bookshelf_type'};
    316        
    317     # Whether to use an hlist or a vlist for each level in the hierarchy (default: vlist)
    318     foreach my $metadata_group (@metadata_groups) {
    319     $self->{$metadata_group . ".list_type"} = "VList";
    320     }
    321     foreach my $metadata_group (split(/\,/, $self->{'use_hlist_for'})) {
    322     $self->{$metadata_group . ".list_type"} = "HList";
    323     }
    324316
    325317    # How the items are grouped into partitions (default: by letter)
    326318    # for each level (metadata group), separated by '/'
    327 
    328319    $self->set_metadata_groups_info_per_level("partition_type_within_level", $partition_type_default, $valid_partition_types);
    329 
    330320    $self->set_metadata_groups_info_per_level("numeric_partition_type_within_level", $numeric_partition_type_default, $valid_numeric_partition_types);
    331321
     
    425415    # A hash for all the doc ids that we have seen, so we don't classify something twice
    426416    $self->{'all_doc_OIDs'} = {};
    427     #return bless $self, $class;
    428417    return $self;
    429418}
     
    443432
    444433    if (!defined $self->{$info_name}) {
    445     print STDERR "**** undefined $info_name\n";
     434    print STDERR "List Error: no values were set for option $info_name\n";
    446435    }
    447436    my @info_list = split(/\//, $self->{$info_name});
     
    581570            my $lc_metadata_value = lc($metadata_value);
    582571            $lc_metadata_value  = &sorttools::format_metadata_for_sorting($real_metadata_element, $lc_metadata_value, $doc_obj) unless $self->{'no_metadata_formatting'};
    583             #print STDERR "formatted value = $lc_metadata_value\n";
    584572   
    585573            # Add the metadata value into the list for this combination of metadata group
     
    623611    # Create the root node of the classification hierarchy
    624612    my %classifier_node = ( 'thistype' => "Invisible",
    625                 'childtype' => $self->{$first_metadata_group . ".list_type"},
     613                #'childtype' => $self->{$first_metadata_group . ".list_type"},
    626614                'Title' => $self->{'buttonname'},
    627615                'contains' => [],
     
    695683    }
    696684    elsif ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metadata_value_to_OIDs_hash) > $partition_size_within_level) {
    697         $self->split_constant_size(\@metadata_groups, $classifier_node, $partition_size_within_level, $partition_sort_mode_within_level, \%metadata_value_to_OIDs_hash, $self->{'partition_name_length'});
     685        $self->split_constant_size(\@metadata_groups, $classifier_node, $partition_size_within_level, $partition_sort_mode_within_level, $bookshelf_type_within_level, \%metadata_value_to_OIDs_hash, $self->{'partition_name_length'});
    698686    }
    699687       
     
    743731    # Generate hlists of a certain size
    744732   
    745     $self->split_constant_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1);
     733    $self->split_constant_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $bookshelf_type_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1);
    746734    } elsif ($numeric_partition_type_within_level eq "approximate_size" && scalar(keys %$numeric_metadata_value_to_OIDs_hash_ref) > $numeric_partition_size_within_level) {
    747735    $self->split_approximate_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $bookshelf_type_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1);
     
    818806    for (my $i = 0; $i < scalar(@partition_buckets) - 1; $i++) {
    819807   
    820     my $this_bucket = $partition_buckets[$i]; #->{'name'};
    821     my $next_bucket = $partition_buckets[$i+1]; #->{'name'};
     808    my $this_bucket = $partition_buckets[$i];
     809    my $next_bucket = $partition_buckets[$i+1];
    822810           
    823811    my $items_in_partition = $this_bucket->{'size'};               
     
    848836    my $last_partition_end = "";
    849837    my $partition_start = "";
     838    my $partition_end = "";
     839    my $partition_name = "";
    850840    foreach my $partition (@new_partition_buckets) {
    851841    my @metadata_values = $self->sort_metadata_values_array($sort_mode, @{$partition->{'metadata_values'}});
     
    853843    $partition_start = $self->generate_partition_start($metadata_values[0], $last_partition_end, $partition_name_length, $is_numeric);
    854844   
    855     if ($items_in_partition > $partition_size) {
     845    if ($items_in_partition <= $partition_size) {
     846        # we can just add the partition as is
     847        my %metadata_values_to_OIDs_subhashes = ();
     848        for (my $i = 0; $i < scalar(@metadata_values); $i++) {
     849        my $metadata_value = $metadata_values[$i];
     850        $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};         
     851        }
     852        my $last_metadata_value = $metadata_values[scalar(@metadata_values)-1];
     853        $partition_end = $self->generate_partition_end($last_metadata_value, $partition_start, $partition_name_length, $is_numeric);
     854        $partition_name = $partition_start;
     855        if ($partition_end ne $partition_start) {
     856        $partition_name = $partition_name . "-" . $partition_end;
     857        }
     858        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes);         
     859        $last_partition_end = $partition_end;       
     860    } else {
     861        # we have too many items, need to split the partition
    856862        my $items_done = 0;
    857863        my %metadata_values_to_OIDs_subhashes = ();
     
    861867        my $items_for_this_md_value = $bookshelf_type eq "never" ? scalar(@{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}) : 1;
    862868       
    863         my $partitionend = $self->generate_partition_end($metadata_value, $partition_start, $partition_name_length, $is_numeric);
    864         my $partitionname = $partition_start;
    865         if ($partitionend ne $partition_start) {
    866             $partitionname = $partitionname . "-" . $partitionend;
    867         }
    868        
    869869        if ($items_done + $items_for_this_md_value > $partition_size && $items_done != 0) {
    870             # Start a new partition
    871             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes);
    872             $last_partition_end = $partitionend;           
     870            # Save the stored items into a partition
     871            $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes);
     872            $last_partition_end = $partition_end;           
    873873            $partition_start = $self->generate_partition_start($metadata_value, $last_partition_end, $partition_name_length, $is_numeric);
    874874            $items_done = 0;
     
    878878        # If bookshelf_type is "never" and the current metadata value holds too many items, need to split into several partitions
    879879        if ($bookshelf_type eq "never" && $items_for_this_md_value > $partition_size) {
    880            
     880
    881881            my $partitionname_for_this_value = $self->generate_partition_start($metadata_value, $last_partition_end, $partition_name_length, $is_numeric);
    882882            # Get the number of partitions needed for this value
    883883            my $num_splits = int($items_for_this_md_value / $partition_size);
    884884            $num_splits++ if ($items_for_this_md_value / $partition_size > $num_splits);
    885            
    886             my @OIDs_for_this_value = $metadata_value_to_OIDs_hash_ref->{$metadata_value};
     885            my @OIDs_for_this_value = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}};
    887886            for (my $i = 0; $i < $num_splits; $i++) {
    888887            my %OIDs_subhashes_for_this_value = ();
     
    894893            # The last bucket might have only a few items and need to be merged with buckets for subsequent metadata values
    895894            if ($i == $num_splits - 1 && scalar(@OIDs_for_this_partition) < $partition_size) {
     895                $partition_start = $partitionname_for_this_value;
     896                $partition_name = $partition_start;
    896897                $metadata_values_to_OIDs_subhashes{$metadata_value} = \@OIDs_for_this_partition;
    897898                $items_done += scalar(@OIDs_for_this_partition);
    898                 next;
     899                $last_partition_end = $partitionname_for_this_value
     900            } else {
     901           
     902                # Add an HList for this bucket
     903                $OIDs_subhashes_for_this_value{$metadata_value} = \@OIDs_for_this_partition;
     904                $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname_for_this_value, \%OIDs_subhashes_for_this_value);
     905                $last_partition_end = $partitionname_for_this_value;
    899906            }
    900            
    901             # Add an HList for this bucket
    902             $OIDs_subhashes_for_this_value{$metadata_value} = \@OIDs_for_this_partition;
    903             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname_for_this_value, \%OIDs_subhashes_for_this_value);
    904             $last_partition_end = $partitionname_for_this_value;
    905907            }
    906             next;
     908        } else {
     909                   
     910            $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};
     911            $items_done += $bookshelf_type eq "never" ? scalar(@{$metadata_values_to_OIDs_subhashes{$metadata_value}}) : 1;
     912             $partition_end = $self->generate_partition_end($metadata_value, $partition_start, $partition_name_length, $is_numeric);
     913             $partition_name = $partition_start;
     914            if ($partition_end ne $partition_start) {
     915            $partition_name = $partition_name . "-" . $partition_end;
     916            }
     917           
    907918        }
    908                        
    909         $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};
    910         $items_done += $bookshelf_type eq "never" ? scalar(@{$metadata_values_to_OIDs_subhashes{$metadata_value}}) : 1;         
    911919       
    912920        # The last partition
    913         if($i == scalar(@metadata_values) - 1) {
    914             $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes);
     921        if($i == scalar(@metadata_values) - 1 && $items_done >0) {
     922            $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes);
    915923        }
    916         }       
     924       
     925        }
    917926    } # end if items in partition > partition size
    918     else {
    919         # The easier case, just add a partition
    920         my %metadata_values_to_OIDs_subhashes = ();
    921         for (my $i = 0; $i < scalar(@metadata_values); $i++) {
    922         my $metadata_value = $metadata_values[$i];
    923         $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};         
    924         }
    925         my $last_metadata_value = $metadata_values[scalar(@metadata_values)-1];
    926         my $partitionend = $self->generate_partition_end($last_metadata_value, $partition_start, $partition_name_length, $is_numeric);
    927         my $partitionname = $partition_start;
    928         if ($partitionend ne $partition_start) {
    929         $partitionname = $partitionname . "-" . $partitionend;
    930         }
    931    
    932         $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes);           
    933         $last_partition_end = $partitionend;       
    934     }
     927
    935928    }   
    936929   
     
    947940    my $partition_size = shift(@_);
    948941    my $sort_mode = shift(@_);
     942    my $bookshelf_type = shift(@_);
    949943    my $metadata_value_to_OIDs_hash_ref = shift(@_);
    950944    my $partition_name_length = shift(@_);
     
    952946
    953947    my @sortedmetadata_values = $self->sort_metadata_values_array($sort_mode, keys(%$metadata_value_to_OIDs_hash_ref));
    954     my $itemsdone = 0;
    955     my $numitems = scalar(@sortedmetadata_values);
     948    my $items_in_partition = 0;
    956949    my %metadata_value_to_OIDs_subhash = ();
    957950    my $lastpartitionend = "";
    958951    my $partitionstart;
     952   
    959953    foreach my $metadata_value (@sortedmetadata_values) {
    960     $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};
    961     $itemsdone++;
    962     my $itemsinpartition = scalar(keys %metadata_value_to_OIDs_subhash);
    963 
    964     # Is this the start of a new partition?
    965     if ($itemsinpartition == 1) {
     954    if ($items_in_partition == 0) {
     955        # a new partition, set the name
    966956        $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric);
    967957    }
    968 
    969     # Is this the end of the partition?
    970     if ($itemsinpartition == $partition_size || $itemsdone == $numitems) {
     958    my $numitems_for_this_value = ($bookshelf_type eq "never" ? scalar(@{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}) : 1);
     959    if ($items_in_partition + $numitems_for_this_value <= $partition_size) {
     960        # add all the current values into the temporary list
     961        $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};
     962        $items_in_partition += $numitems_for_this_value;
     963    } elsif ($items_in_partition < $partition_size) {
     964        # only want to add some of the values into temporary list
     965        # note, we only get here if bookshelf type is never
     966        my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}};
     967        @OIDs = $self->sort_leaf_items(\@OIDs);
     968        my $num_items_needed = $partition_size - $items_in_partition;
     969        my @slice = splice(@OIDs, 0, $num_items_needed);
     970        $metadata_value_to_OIDs_subhash{$metadata_value} = \@slice;
     971
     972        # now we have filled up the partition
    971973        my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric);
    972974        my $partitionname = $partitionstart;
     
    977979        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash);
    978980        %metadata_value_to_OIDs_subhash = ();
     981        $items_in_partition = 0;
    979982        $lastpartitionend = $partitionend;
    980     }
    981     }
    982 
     983
     984        # can we get more partitions from this metadata value?
     985        while (scalar(@OIDs) >= $partition_size) {
     986        my @slice = splice(@OIDs, 0, $partition_size);
     987        $metadata_value_to_OIDs_subhash{$metadata_value} = \@slice;
     988        $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric);
     989        my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric);
     990        my $partitionname = $partitionstart;
     991        if ($partitionend ne $partitionstart) {
     992            $partitionname = $partitionname . "-" . $partitionend;
     993        }
     994        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash);
     995        %metadata_value_to_OIDs_subhash = ();
     996        $items_in_partition = 0;
     997        $lastpartitionend = $partitionend;
     998
     999        }
     1000        if (scalar(@OIDs) > 0) {
     1001        $metadata_value_to_OIDs_subhash{$metadata_value} = \@OIDs;
     1002        $items_in_partition = scalar(@OIDs);
     1003        $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric);
     1004        }
     1005       
     1006       
     1007    }
     1008
     1009    if ($items_in_partition == $partition_size) {
     1010        # its the end of a partition
     1011        my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric);
     1012        my $partitionname = $partitionstart;
     1013        if ($partitionend ne $partitionstart) {
     1014        $partitionname = $partitionname . "-" . $partitionend;
     1015        }
     1016
     1017        $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash);
     1018        %metadata_value_to_OIDs_subhash = ();
     1019        $items_in_partition = 0;
     1020        $lastpartitionend = $partitionend;
     1021    }
     1022    } # foreach metadata value
     1023
     1024    if ($items_in_partition > 0) {
     1025    # we have to add the last partition
     1026    my $partitionend = $self->generate_partition_end(@sortedmetadata_values[@sortedmetadata_values-1], $partitionstart, $partition_name_length, $is_numeric);
     1027    my $partitionname = $partitionstart;
     1028    if ($partitionend ne $partitionstart) {
     1029        $partitionname = $partitionname . "-" . $partitionend;
     1030    }
     1031   
     1032    $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash);
     1033    }
     1034   
    9831035    # The partitions are stored in an HList
    9841036    $classifier_node->{'childtype'} = "HList";
     
    11341186    my $classifier_node = shift(@_);
    11351187    my $metadata_value_to_OIDs_hash_ref = shift(@_);
    1136 
    11371188    my $metadata_group = shift(@metadata_groups);
    11381189    $classifier_node->{'mdtype'} = $metadata_group;
     
    11431194    {
    11441195    my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}};
     1196
    11451197    # If there is only one item and 'bookshelf_type' is not always (ie. never or duplicate_only), add the item to the list
    11461198    if (@OIDs == 1 && $self->{$metadata_group . ".bookshelf_type"} ne "always") {
     
    11511203    # If 'bookshelf_type' is 'never', list all the items even if there are duplicated values
    11521204    elsif ($self->{$metadata_group . ".bookshelf_type"} eq "never") {
     1205       
    11531206        @OIDs = $self->sort_leaf_items(\@OIDs);
    11541207        foreach my $OID (@OIDs) {
     
    11691222        if (@metadata_groups > 0) {
    11701223        my $next_metadata_group = $metadata_groups[0];     
    1171         $child_classifier_node{'childtype'} = $self->{$next_metadata_group . ".list_type"};
    1172 
    1173         # separate metadata into those that below in the next/sub-metadata_group
    1174         # and those that below at the current level's metadata_group
     1224       
     1225        # separate metadata into those that belong in the next/sub-metadata_group
     1226        # and those that belong at the current level's metadata_group
    11751227
    11761228        my $OID_to_metadata_values_hash_ref = $self->{$next_metadata_group . ".list"};
     
    12181270    my $OID = shift(@_);
    12191271    my $metadata_value = shift(@_);
    1220    
     1272
    12211273    my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"};
    12221274    my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}};
Note: See TracChangeset for help on using the changeset viewer.