Changeset 33463 for main/trunk
- Timestamp:
- 2019-09-12T14:17:11+12:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/classify/List.pm
r33460 r33463 314 314 # Default: duplicate_only, ie. leave leaf nodes ungrouped (equivalent to AZCompactList -mingroup 2) 315 315 $self->{$last_metadata_group . ".bookshelf_type"} = $self->{'bookshelf_type'}; 316 317 # Whether to use an hlist or a vlist for each level in the hierarchy (default: vlist)318 foreach my $metadata_group (@metadata_groups) {319 $self->{$metadata_group . ".list_type"} = "VList";320 }321 foreach my $metadata_group (split(/\,/, $self->{'use_hlist_for'})) {322 $self->{$metadata_group . ".list_type"} = "HList";323 }324 316 325 317 # How the items are grouped into partitions (default: by letter) 326 318 # for each level (metadata group), separated by '/' 327 328 319 $self->set_metadata_groups_info_per_level("partition_type_within_level", $partition_type_default, $valid_partition_types); 329 330 320 $self->set_metadata_groups_info_per_level("numeric_partition_type_within_level", $numeric_partition_type_default, $valid_numeric_partition_types); 331 321 … … 425 415 # A hash for all the doc ids that we have seen, so we don't classify something twice 426 416 $self->{'all_doc_OIDs'} = {}; 427 #return bless $self, $class;428 417 return $self; 429 418 } … … 443 432 444 433 if (!defined $self->{$info_name}) { 445 print STDERR " **** undefined$info_name\n";434 print STDERR "List Error: no values were set for option $info_name\n"; 446 435 } 447 436 my @info_list = split(/\//, $self->{$info_name}); … … 581 570 my $lc_metadata_value = lc($metadata_value); 582 571 $lc_metadata_value = &sorttools::format_metadata_for_sorting($real_metadata_element, $lc_metadata_value, $doc_obj) unless $self->{'no_metadata_formatting'}; 583 #print STDERR "formatted value = $lc_metadata_value\n";584 572 585 573 # Add the metadata value into the list for this combination of metadata group … … 623 611 # Create the root node of the classification hierarchy 624 612 my %classifier_node = ( 'thistype' => "Invisible", 625 'childtype' => $self->{$first_metadata_group . ".list_type"},613 #'childtype' => $self->{$first_metadata_group . ".list_type"}, 626 614 'Title' => $self->{'buttonname'}, 627 615 'contains' => [], … … 695 683 } 696 684 elsif ($partition_type_within_level =~ /^constant_size$/i && scalar(keys %metadata_value_to_OIDs_hash) > $partition_size_within_level) { 697 $self->split_constant_size(\@metadata_groups, $classifier_node, $partition_size_within_level, $partition_sort_mode_within_level, \%metadata_value_to_OIDs_hash, $self->{'partition_name_length'});685 $self->split_constant_size(\@metadata_groups, $classifier_node, $partition_size_within_level, $partition_sort_mode_within_level, $bookshelf_type_within_level, \%metadata_value_to_OIDs_hash, $self->{'partition_name_length'}); 698 686 } 699 687 … … 743 731 # Generate hlists of a certain size 744 732 745 $self->split_constant_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $ numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1);733 $self->split_constant_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $bookshelf_type_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1); 746 734 } elsif ($numeric_partition_type_within_level eq "approximate_size" && scalar(keys %$numeric_metadata_value_to_OIDs_hash_ref) > $numeric_partition_size_within_level) { 747 735 $self->split_approximate_size(\@metadata_groups, $classifier_node, $numeric_partition_size_within_level, $numeric_partition_sort_mode_within_level, $bookshelf_type_within_level, $numeric_metadata_value_to_OIDs_hash_ref, $numeric_partition_name_length_within_level, 1); … … 818 806 for (my $i = 0; $i < scalar(@partition_buckets) - 1; $i++) { 819 807 820 my $this_bucket = $partition_buckets[$i]; #->{'name'};821 my $next_bucket = $partition_buckets[$i+1]; #->{'name'};808 my $this_bucket = $partition_buckets[$i]; 809 my $next_bucket = $partition_buckets[$i+1]; 822 810 823 811 my $items_in_partition = $this_bucket->{'size'}; … … 848 836 my $last_partition_end = ""; 849 837 my $partition_start = ""; 838 my $partition_end = ""; 839 my $partition_name = ""; 850 840 foreach my $partition (@new_partition_buckets) { 851 841 my @metadata_values = $self->sort_metadata_values_array($sort_mode, @{$partition->{'metadata_values'}}); … … 853 843 $partition_start = $self->generate_partition_start($metadata_values[0], $last_partition_end, $partition_name_length, $is_numeric); 854 844 855 if ($items_in_partition > $partition_size) { 845 if ($items_in_partition <= $partition_size) { 846 # we can just add the partition as is 847 my %metadata_values_to_OIDs_subhashes = (); 848 for (my $i = 0; $i < scalar(@metadata_values); $i++) { 849 my $metadata_value = $metadata_values[$i]; 850 $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 851 } 852 my $last_metadata_value = $metadata_values[scalar(@metadata_values)-1]; 853 $partition_end = $self->generate_partition_end($last_metadata_value, $partition_start, $partition_name_length, $is_numeric); 854 $partition_name = $partition_start; 855 if ($partition_end ne $partition_start) { 856 $partition_name = $partition_name . "-" . $partition_end; 857 } 858 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes); 859 $last_partition_end = $partition_end; 860 } else { 861 # we have too many items, need to split the partition 856 862 my $items_done = 0; 857 863 my %metadata_values_to_OIDs_subhashes = (); … … 861 867 my $items_for_this_md_value = $bookshelf_type eq "never" ? scalar(@{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}) : 1; 862 868 863 my $partitionend = $self->generate_partition_end($metadata_value, $partition_start, $partition_name_length, $is_numeric);864 my $partitionname = $partition_start;865 if ($partitionend ne $partition_start) {866 $partitionname = $partitionname . "-" . $partitionend;867 }868 869 869 if ($items_done + $items_for_this_md_value > $partition_size && $items_done != 0) { 870 # S tart a newpartition871 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition name, \%metadata_values_to_OIDs_subhashes);872 $last_partition_end = $partition end;870 # Save the stored items into a partition 871 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes); 872 $last_partition_end = $partition_end; 873 873 $partition_start = $self->generate_partition_start($metadata_value, $last_partition_end, $partition_name_length, $is_numeric); 874 874 $items_done = 0; … … 878 878 # If bookshelf_type is "never" and the current metadata value holds too many items, need to split into several partitions 879 879 if ($bookshelf_type eq "never" && $items_for_this_md_value > $partition_size) { 880 880 881 881 my $partitionname_for_this_value = $self->generate_partition_start($metadata_value, $last_partition_end, $partition_name_length, $is_numeric); 882 882 # Get the number of partitions needed for this value 883 883 my $num_splits = int($items_for_this_md_value / $partition_size); 884 884 $num_splits++ if ($items_for_this_md_value / $partition_size > $num_splits); 885 886 my @OIDs_for_this_value = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 885 my @OIDs_for_this_value = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 887 886 for (my $i = 0; $i < $num_splits; $i++) { 888 887 my %OIDs_subhashes_for_this_value = (); … … 894 893 # The last bucket might have only a few items and need to be merged with buckets for subsequent metadata values 895 894 if ($i == $num_splits - 1 && scalar(@OIDs_for_this_partition) < $partition_size) { 895 $partition_start = $partitionname_for_this_value; 896 $partition_name = $partition_start; 896 897 $metadata_values_to_OIDs_subhashes{$metadata_value} = \@OIDs_for_this_partition; 897 898 $items_done += scalar(@OIDs_for_this_partition); 898 next; 899 $last_partition_end = $partitionname_for_this_value 900 } else { 901 902 # Add an HList for this bucket 903 $OIDs_subhashes_for_this_value{$metadata_value} = \@OIDs_for_this_partition; 904 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname_for_this_value, \%OIDs_subhashes_for_this_value); 905 $last_partition_end = $partitionname_for_this_value; 899 906 } 900 901 # Add an HList for this bucket902 $OIDs_subhashes_for_this_value{$metadata_value} = \@OIDs_for_this_partition;903 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname_for_this_value, \%OIDs_subhashes_for_this_value);904 $last_partition_end = $partitionname_for_this_value;905 907 } 906 next; 908 } else { 909 910 $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 911 $items_done += $bookshelf_type eq "never" ? scalar(@{$metadata_values_to_OIDs_subhashes{$metadata_value}}) : 1; 912 $partition_end = $self->generate_partition_end($metadata_value, $partition_start, $partition_name_length, $is_numeric); 913 $partition_name = $partition_start; 914 if ($partition_end ne $partition_start) { 915 $partition_name = $partition_name . "-" . $partition_end; 916 } 917 907 918 } 908 909 $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value};910 $items_done += $bookshelf_type eq "never" ? scalar(@{$metadata_values_to_OIDs_subhashes{$metadata_value}}) : 1;911 919 912 920 # The last partition 913 if($i == scalar(@metadata_values) - 1 ) {914 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition name, \%metadata_values_to_OIDs_subhashes);921 if($i == scalar(@metadata_values) - 1 && $items_done >0) { 922 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partition_name, \%metadata_values_to_OIDs_subhashes); 915 923 } 916 } 924 925 } 917 926 } # end if items in partition > partition size 918 else { 919 # The easier case, just add a partition 920 my %metadata_values_to_OIDs_subhashes = (); 921 for (my $i = 0; $i < scalar(@metadata_values); $i++) { 922 my $metadata_value = $metadata_values[$i]; 923 $metadata_values_to_OIDs_subhashes{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 924 } 925 my $last_metadata_value = $metadata_values[scalar(@metadata_values)-1]; 926 my $partitionend = $self->generate_partition_end($last_metadata_value, $partition_start, $partition_name_length, $is_numeric); 927 my $partitionname = $partition_start; 928 if ($partitionend ne $partition_start) { 929 $partitionname = $partitionname . "-" . $partitionend; 930 } 931 932 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_values_to_OIDs_subhashes); 933 $last_partition_end = $partitionend; 934 } 927 935 928 } 936 929 … … 947 940 my $partition_size = shift(@_); 948 941 my $sort_mode = shift(@_); 942 my $bookshelf_type = shift(@_); 949 943 my $metadata_value_to_OIDs_hash_ref = shift(@_); 950 944 my $partition_name_length = shift(@_); … … 952 946 953 947 my @sortedmetadata_values = $self->sort_metadata_values_array($sort_mode, keys(%$metadata_value_to_OIDs_hash_ref)); 954 my $itemsdone = 0; 955 my $numitems = scalar(@sortedmetadata_values); 948 my $items_in_partition = 0; 956 949 my %metadata_value_to_OIDs_subhash = (); 957 950 my $lastpartitionend = ""; 958 951 my $partitionstart; 952 959 953 foreach my $metadata_value (@sortedmetadata_values) { 960 $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 961 $itemsdone++; 962 my $itemsinpartition = scalar(keys %metadata_value_to_OIDs_subhash); 963 964 # Is this the start of a new partition? 965 if ($itemsinpartition == 1) { 954 if ($items_in_partition == 0) { 955 # a new partition, set the name 966 956 $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric); 967 957 } 968 969 # Is this the end of the partition? 970 if ($itemsinpartition == $partition_size || $itemsdone == $numitems) { 958 my $numitems_for_this_value = ($bookshelf_type eq "never" ? scalar(@{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}) : 1); 959 if ($items_in_partition + $numitems_for_this_value <= $partition_size) { 960 # add all the current values into the temporary list 961 $metadata_value_to_OIDs_subhash{$metadata_value} = $metadata_value_to_OIDs_hash_ref->{$metadata_value}; 962 $items_in_partition += $numitems_for_this_value; 963 } elsif ($items_in_partition < $partition_size) { 964 # only want to add some of the values into temporary list 965 # note, we only get here if bookshelf type is never 966 my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 967 @OIDs = $self->sort_leaf_items(\@OIDs); 968 my $num_items_needed = $partition_size - $items_in_partition; 969 my @slice = splice(@OIDs, 0, $num_items_needed); 970 $metadata_value_to_OIDs_subhash{$metadata_value} = \@slice; 971 972 # now we have filled up the partition 971 973 my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric); 972 974 my $partitionname = $partitionstart; … … 977 979 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 978 980 %metadata_value_to_OIDs_subhash = (); 981 $items_in_partition = 0; 979 982 $lastpartitionend = $partitionend; 980 } 981 } 982 983 984 # can we get more partitions from this metadata value? 985 while (scalar(@OIDs) >= $partition_size) { 986 my @slice = splice(@OIDs, 0, $partition_size); 987 $metadata_value_to_OIDs_subhash{$metadata_value} = \@slice; 988 $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric); 989 my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric); 990 my $partitionname = $partitionstart; 991 if ($partitionend ne $partitionstart) { 992 $partitionname = $partitionname . "-" . $partitionend; 993 } 994 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 995 %metadata_value_to_OIDs_subhash = (); 996 $items_in_partition = 0; 997 $lastpartitionend = $partitionend; 998 999 } 1000 if (scalar(@OIDs) > 0) { 1001 $metadata_value_to_OIDs_subhash{$metadata_value} = \@OIDs; 1002 $items_in_partition = scalar(@OIDs); 1003 $partitionstart = $self->generate_partition_start($metadata_value, $lastpartitionend, $partition_name_length, $is_numeric); 1004 } 1005 1006 1007 } 1008 1009 if ($items_in_partition == $partition_size) { 1010 # its the end of a partition 1011 my $partitionend = $self->generate_partition_end($metadata_value, $partitionstart, $partition_name_length, $is_numeric); 1012 my $partitionname = $partitionstart; 1013 if ($partitionend ne $partitionstart) { 1014 $partitionname = $partitionname . "-" . $partitionend; 1015 } 1016 1017 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 1018 %metadata_value_to_OIDs_subhash = (); 1019 $items_in_partition = 0; 1020 $lastpartitionend = $partitionend; 1021 } 1022 } # foreach metadata value 1023 1024 if ($items_in_partition > 0) { 1025 # we have to add the last partition 1026 my $partitionend = $self->generate_partition_end(@sortedmetadata_values[@sortedmetadata_values-1], $partitionstart, $partition_name_length, $is_numeric); 1027 my $partitionname = $partitionstart; 1028 if ($partitionend ne $partitionstart) { 1029 $partitionname = $partitionname . "-" . $partitionend; 1030 } 1031 1032 $self->add_hlist_partition(\@metadata_groups, $classifier_node, $partitionname, \%metadata_value_to_OIDs_subhash); 1033 } 1034 983 1035 # The partitions are stored in an HList 984 1036 $classifier_node->{'childtype'} = "HList"; … … 1134 1186 my $classifier_node = shift(@_); 1135 1187 my $metadata_value_to_OIDs_hash_ref = shift(@_); 1136 1137 1188 my $metadata_group = shift(@metadata_groups); 1138 1189 $classifier_node->{'mdtype'} = $metadata_group; … … 1143 1194 { 1144 1195 my @OIDs = @{$metadata_value_to_OIDs_hash_ref->{$metadata_value}}; 1196 1145 1197 # If there is only one item and 'bookshelf_type' is not always (ie. never or duplicate_only), add the item to the list 1146 1198 if (@OIDs == 1 && $self->{$metadata_group . ".bookshelf_type"} ne "always") { … … 1151 1203 # If 'bookshelf_type' is 'never', list all the items even if there are duplicated values 1152 1204 elsif ($self->{$metadata_group . ".bookshelf_type"} eq "never") { 1205 1153 1206 @OIDs = $self->sort_leaf_items(\@OIDs); 1154 1207 foreach my $OID (@OIDs) { … … 1169 1222 if (@metadata_groups > 0) { 1170 1223 my $next_metadata_group = $metadata_groups[0]; 1171 $child_classifier_node{'childtype'} = $self->{$next_metadata_group . ".list_type"}; 1172 1173 # separate metadata into those that below in the next/sub-metadata_group 1174 # and those that below at the current level's metadata_group 1224 1225 # separate metadata into those that belong in the next/sub-metadata_group 1226 # and those that belong at the current level's metadata_group 1175 1227 1176 1228 my $OID_to_metadata_values_hash_ref = $self->{$next_metadata_group . ".list"}; … … 1218 1270 my $OID = shift(@_); 1219 1271 my $metadata_value = shift(@_); 1220 1272 1221 1273 my $OID_to_metadata_values_hash_ref = $self->{$metadata_group . ".list"}; 1222 1274 my @metadata_values = @{$OID_to_metadata_values_hash_ref->{$OID}};
Note:
See TracChangeset
for help on using the changeset viewer.