source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 3303

Last change on this file since 3303 was 3303, checked in by davidb, 22 years ago

Classifier extented to support frequency sort option through -freqsort

Instead of sorting bookshelf nodes alph-numerically, this option
uses the number of books contained at the node for ordering.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.4 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28#
29# metadata=Metaname -- all documents with Metaname metadata
30# will be included in list, list will be sorted
31# by this element.
32# buttonname=Title -- (optional) the title field for this classification.
33# if not included title field will be Metaname.
34# mingroup=Num -- (optional) the smallest value that will cause
35# a group in the hierarchy to form.
36# minnesting=Num -- (optional) the smallest value that will cause a
37# list to converted into nested list
38# mincompact=Num -- (optional) used in compact list
39# maxcompact=Num -- (optional) used in compact list
40# doclevel=top|section -- (optional) level to process document at.
41# onlyfirst -- (optional) control whether all or only first
42# metadata value used from array of metadata
43package AZCompactList;
44
45use BasClas;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52sub print_usage {
53 print STDERR "
54 usage: classify AZCompactList -metadata X [options]
55 options:
56 -metadata X (required) Metadata field used for classification
57 -buttonname X Title to use on web pages (defaults to metadata)
58 -removeprefix regex pattern to remove from metadata before sorting
59 -doclevel top|section (Defaults to top)
60 -freqsort Sort by node frequency rather than alpha-numeric
61 -mingroup N Minimum num of documents required to form a new group
62 -minnesting N Minimum list size to become a nested list
63 -mincompact N Used in compact list
64 -maxcompact N Used in compact list
65 -onlyfirst Only use the first value if metadata is repeated.
66 -recopt
67";
68}
69
70sub new {
71 my $class = shift (@_);
72 my $self = new BasClas($class, @_);
73
74 my ($metaname, $title, $removeprefix);
75 my $mingroup = 2;
76 my $minnesting = 20;
77 my $mincompact = 10;
78 my $maxcompact = 30;
79 my $doclevel = "top";
80 my $onlyfirst = 0;
81 my $freqsort = 0;
82 my $recopt = undef;
83
84 if (!parsargv::parse(\@_,
85 q^metadata/.*/^, \$metaname,
86 q^buttonname/.*/^, \$title,
87 q^removeprefix/.*/^, \$removeprefix,
88 q^mingroup/.*/2^, \$mingroup,
89 q^minnesting/.*/20^, \$minnesting,
90 q^mincompact/.*/10^, \$mincompact,
91 q^maxcompact/.*/30^, \$maxcompact,
92 q^doclevel/.*/top^, \$doclevel,
93 q^onlyfirst/.*/0^, \$onlyfirst,
94 q^freqsort/.*/0^, \$freqsort,
95 q^recopt/.*/-1^, \$recopt,
96
97 "allow_extra_options")) {
98
99 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
100 &print_usage();
101 die "\n";
102 }
103
104 if (!defined $metaname) {
105 my $outhandle = $self->{'outhandle'};
106 print $outhandle "AZCompactList used with no metadata name to classify by\n";
107 die "\n";
108 }
109
110 $title = $metaname unless ($title);
111
112 $self->{'list'} = {};
113 $self->{'listmetavalue'} = {};
114 $self->{'reclassify'} = {};
115 $self->{'reclassifylist'} = {};
116 $self->{'metaname'} = $metaname;
117 $self->{'title'} = "$title"; # title for the titlebar.
118 if (defined($removeprefix) && $removeprefix) {
119 $self->{'removeprefix'} = $removeprefix;
120 }
121 $self->{'mingroup'} = $mingroup;
122 $self->{'minnesting'} = $minnesting;
123 $self->{'mincompact'} = $mincompact;
124 $self->{'maxcompact'} = $maxcompact;
125 $self->{'doclevel'} = $doclevel;
126
127 if ($onlyfirst != 0) {
128 $onlyfirst = 1;
129 }
130 $self->{'onlyfirst'} = $onlyfirst;
131
132 if ($freqsort != 0) {
133 $freqsort = 1;
134 }
135 $self->{'freqsort'} = $freqsort;
136
137 if ($recopt == -1) {
138 $recopt = undef;
139 } else {
140 $recopt = "on";
141 }
142 $self->{'recopt'} = $recopt;
143
144 return bless $self, $class;
145}
146
147sub init
148{
149 my $self = shift (@_);
150
151 $self->{'list'} = {};
152 $self->{'listmetavalue'} = {};
153 $self->{'reclassify'} = {};
154 $self->{'reclassifylist'} = {};
155}
156
157$tmp = 0;
158
159sub classify
160{
161 my $self = shift (@_);
162 my ($doc_obj) = @_;
163
164 my $doc_OID = $doc_obj->get_OID();
165
166 my @sectionlist = ();
167 my $topsection = $doc_obj->get_top_section();
168
169 my $metaname = $self->{'metaname'};
170 my $outhandle = $self->{'outhandle'};
171
172 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
173
174 if ($self->{'doclevel'} =~ /^top(level)?/i)
175 {
176 push(@sectionlist,$topsection);
177 }
178 else
179 {
180 my $thissection = $doc_obj->get_next_section($topsection);
181 while (defined $thissection)
182 {
183 push(@sectionlist,$thissection);
184 $thissection = $doc_obj->get_next_section ($thissection);
185 }
186 }
187
188 my $thissection;
189 foreach $thissection (@sectionlist)
190 {
191 my $full_doc_OID
192 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
193
194 if (defined $self->{'list'}->{$full_doc_OID})
195 {
196 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
197 }
198 $self->{'list'}->{$full_doc_OID} = [];
199 $self->{'listmetavalue'}->{$full_doc_OID} = [];
200
201 my $metavalues = $doc_obj->get_metadata($thissection,$metaname);
202 my $metavalue;
203 foreach $metavalue (@$metavalues)
204 {
205 # if this document doesn't contain the metadata element we're
206 # sorting by we won't include it in this classification
207 if (defined $metavalue && $metavalue =~ /\w/)
208 {
209 if ($self->{'removeprefix'}) {
210 $metavalue =~ s/^$self->{'removeprefix'}//;
211 }
212
213 my $formatted_metavalue = $metavalue;
214
215 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
216 {
217 &sorttools::format_string_name_english (\$formatted_metavalue);
218 }
219 else
220 {
221 &sorttools::format_string_english (\$formatted_metavalue);
222 }
223
224 #### prefix-str
225 if (! defined($formatted_metavalue)) {
226 print $outhandle "Warning: AZCompactList: metavalue is ";
227 print $outhandle "empty\n";
228 $formatted_metavalue="";
229 }
230
231 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
232 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
233
234 last if ($self->{'onlyfirst'});
235 }
236 }
237 my $date = $doc_obj->get_metadata_element($thissection,"Date");
238 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
239 }
240}
241
242sub reinit
243{
244 my ($self,$classlist_ref) = @_;
245 my $outhandle = $self->{'outhandle'};
246
247 my %mtfreq = ();
248 my @single_classlist = ();
249 my @multiple_classlist = ();
250
251 # find out how often each metavalue occurs
252 map
253 {
254 my $mv;
255 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
256 {
257 $mtfreq{$mv}++;
258 }
259 } @$classlist_ref;
260
261 # use this information to split the list: single metavalue/repeated value
262 map
263 {
264 my $i = 1;
265 my $metavalue;
266 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
267 {
268 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
269 {
270 push(@multiple_classlist,[$_,$i,$metavalue]);
271 }
272 else
273 {
274 push(@single_classlist,[$_,$metavalue]);
275 $metavalue =~ tr/[A-Z]/[a-z]/;
276 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
277 }
278 $i++;
279 }
280 } @$classlist_ref;
281
282
283 # Setup sub-classifiers for multiple list
284
285 $self->{'classifiers'} = {};
286
287 my $pm;
288 foreach $pm ("List", "SectionList")
289 {
290 my $listname
291 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
292 if (-e $listname) { require $listname; }
293 else
294 {
295 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
296 die "\n";
297 }
298 }
299
300 # Create classifiers objects for each entry >= mingroup
301 my $metavalue;
302 foreach $metavalue (keys %mtfreq)
303 {
304 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
305 {
306 my $listclassobj;
307 my $doclevel = $self->{'doclevel'};
308 my $metaname = $self->{'metaname'};
309 my @metaname_list = split('/',$metaname);
310 $metaname = shift(@metaname_list);
311 if (@metaname_list==0)
312 {
313 my @args;
314 push @args, ("-metadata", "$metaname");
315# buttonname is also used for the node's title
316 push @args, ("-buttonname", "$metavalue");
317 push @args, ("-sort", "Date");
318
319 if ($doclevel =~ m/^top(level)?/i)
320 {
321 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
322 }
323 else
324 {
325 eval ("\$listclassobj = new SectionList(\@args)");
326 }
327 }
328 else
329 {
330 $metaname = join('/',@metaname_list);
331
332 my @args;
333 push @args, ("-metadata", "$metaname");
334# buttonname is also used for the node's title
335 push @args, ("-buttonname", "$metavalue");
336 push @args, ("-doclevel", "$doclevel");
337 push @args, "-recopt";
338
339 eval ("\$listclassobj = new AZCompactList(\@args)");
340 }
341 if ($@) {
342 print $outhandle "$@";
343 die "\n";
344 }
345
346 $listclassobj->init();
347
348 if (defined $metavalue && $metavalue =~ /\w/)
349 {
350 my $formatted_node = $metavalue;
351
352 if ($self->{'removeprefix'}) {
353 $formatted_node =~ s/^$self->{'removeprefix'}//;
354 }
355
356 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
357 {
358 &sorttools::format_string_name_english(\$formatted_node);
359 }
360 else
361 {
362 &sorttools::format_string_english(\$formatted_node);
363 }
364
365 # In case our formatted string is empty...
366 if (! defined($formatted_node)) {
367 print $outhandle "Warning: AZCompactList: metavalue is ";
368 print $outhandle "empty\n";
369 $formatted_node="";
370 }
371
372 $self->{'classifiers'}->{$metavalue}
373 = { 'classifyobj' => $listclassobj,
374 'formattednode' => $formatted_node };
375 }
376 }
377 }
378
379
380 return (\@single_classlist,\@multiple_classlist);
381}
382
383
384sub reclassify
385{
386 my ($self,$multiple_cl_ref) = @_;
387
388 # Entries in the current classify list that are "book nodes"
389 # should be recursively classified.
390 #--
391 foreach $dm_pair (@$multiple_cl_ref)
392 {
393 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
394 my $listclassobj;
395
396 # find metavalue in list of sub-classifiers
397 my $found = 0;
398 my $node_name;
399 foreach $node_name (keys %{$self->{'classifiers'}})
400 {
401 $resafe_node_name = $node_name;
402 $resafe_node_name =~ s/(\(|\)|\[|\]|\{|\}|\^|\$|\.|\+|\*|\?|\|)/\\$1/g;
403 if ($metavalue =~ m/^$resafe_node_name$/i)
404 {
405 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
406
407 ## date appears to not be used in classifier call ####
408
409 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
410 {
411 my $section=$1;
412 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
413 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
414 ->classify($doc_obj, "Section=$section");
415 } else { # section level
416 # Thanks to Don Gourley for this...
417 # classify can't handle multi-level section
418 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
419 ->classify_section($section, $doc_obj, $date);
420 }
421 }
422 else
423 {
424 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
425 ->classify($doc_obj);
426 }
427
428 $found = 1;
429 last;
430 }
431 }
432
433 if (!$found)
434 {
435 my $outhandle=$self->{outhandle};
436 print $outhandle "Warning: AZCompactList::reclassify ";
437 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
438 }
439 }
440}
441
442
443
444sub get_reclassify_info
445{
446 my $self = shift (@_);
447
448 my $node_name;
449 foreach $node_name (keys %{$self->{'classifiers'}})
450 {
451 my $classifyinfo
452 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
453 ->get_classify_info();
454 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
455 = $classifyinfo;
456 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
457 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
458 }
459}
460
461
462sub alpha_numeric_cmp
463{
464 my ($self,$a,$b) = @_;
465
466 my $title_a = $self->{'reclassifylist'}->{$a};
467 my $title_b = $self->{'reclassifylist'}->{$b};
468
469 if ($title_a =~ m/^(\d+(\.\d+)?)/)
470 {
471 my $val_a = $1;
472 if ($title_b =~ m/^(\d+(\.\d+)?)/)
473 {
474 my $val_b = $1;
475 if ($val_a != $val_b)
476 {
477 return ($val_a <=> $val_b);
478 }
479 }
480 }
481
482 return ($title_a cmp $title_b);
483}
484
485sub frequency_cmp
486{
487 my ($self,$a,$b) = @_;
488
489
490 my $title_a = $self->{'reclassifylist'}->{$a};
491 my $title_b = $self->{'reclassifylist'}->{$b};
492
493 print STDERR "*** title a = $title_a\n";
494 print STDERR "*** title b = $title_b\n";
495
496 my $a_freq = 1;
497 my $b_freq = 1;
498
499 if ($a =~ m/^CLASSIFY\.(.*)$/)
500 {
501 my $a_node = $1;
502 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
503 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
504 }
505
506 if ($b =~ m/^CLASSIFY\.(.*)$/)
507 {
508 my $b_node = $1;
509 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
510 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
511 }
512
513## print STDERR "*** comparing $a_freq <-> $b_freq\n";
514
515 return $b_freq <=> $a_freq;
516}
517
518sub get_classify_info {
519 my $self = shift (@_);
520
521 my @classlist =keys %{$self->{'list'}}; # list all doc oids
522
523 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
524 $self->reclassify($multiple_cl_ref);
525 $self->get_reclassify_info();
526
527 my @reclassified_classlist;
528 if ($self->{'freqsort'})
529 {
530## print STDERR "***** Away to start freq sort\n";
531
532 @reclassified_classlist
533 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
534 # supress sub-grouping by alphabet
535 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
536
537 }
538 else
539 {
540# @reclassified_classlist
541# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
542
543 # alpha_numeric_cmp is slower but handles numbers better ...
544
545 @reclassified_classlist
546 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
547
548 }
549
550 return $self->splitlist (\@reclassified_classlist);
551}
552
553sub get_entry {
554 my $self = shift (@_);
555 my ($title, $childtype, $metaname, $thistype) = @_;
556
557 # organise into classification structure
558 my %classifyinfo = ('childtype'=>$childtype,
559 'Title'=>$title,
560 'contains'=>[],
561 'mdtype'=>$metaname);
562
563 $classifyinfo{'thistype'} = $thistype
564 if defined $thistype && $thistype =~ /\w/;
565
566 return \%classifyinfo;
567}
568
569
570
571# splitlist takes an ordered list of classifications (@$classlistref) and
572# splits it up into alphabetical sub-sections.
573sub splitlist {
574 my $self = shift (@_);
575 my ($classlistref) = @_;
576 my $classhash = {};
577
578 # top level
579 my @metanames = split("/",$self->{'metaname'});
580 my $metaname = shift(@metanames);
581
582 my $childtype = "HList";
583 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
584
585 my $classifyinfo;
586 if (!defined($self->{'recopt'}))
587 {
588 my $title = $self->{'title'}; # should always be defined by now....
589 $title = $metaname unless defined $title;
590 $classifyinfo
591 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
592 }
593 else
594 {
595 my $title = $self->{'title'};
596 $classifyinfo
597 = $self->get_entry ($title, $childtype, $metaname, "VList");
598 }
599
600 # don't need to do any splitting if there are less than 'minnesting' classifications
601 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
602 foreach $subOID (@$classlistref) {
603 if ($subOID =~ /^CLASSIFY\.(.*)$/
604 && defined $self->{'classifiers'}->{$1})
605 {
606### print STDERR "*** subOID = $subOID\n";
607
608 push (@{$classifyinfo->{'contains'}},
609 $self->{'classifiers'}->{$1}->{'classifyinfo'});
610 }
611 else
612 {
613 $subOID =~ s/^Metavalue_(\d+)\.//;
614 my $metaname_offset = $1 -1;
615 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
616 push (@{$classifyinfo->{'contains'}}, $oid_rec);
617 }
618 }
619 return $classifyinfo;
620 }
621
622 # first split up the list into separate A-Z and 0-9 classifications
623 foreach $classification (@$classlistref) {
624 my $title = $self->{'reclassifylist'}->{$classification};
625 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
626### $title =~ s/^\s+//g; # remove a leading spaces
627### $title =~ s/^_+//g; # remove a leading underscores
628 $title =~ s/^\W+//g;
629### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
630
631# only want first character for classification
632 $title =~ m/^(.)/; $title=$1;
633 $title =~ tr/[a-z]/[A-Z]/;
634
635 if ($title =~ /^[0-9]$/) {$title = '0-9';}
636 elsif ($title !~ /^[A-Z]$/) {
637 my $outhandle = $self->{'outhandle'};
638 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
639 }
640 $classhash->{$title} = [] unless defined $classhash->{$title};
641 push (@{$classhash->{$title}}, $classification);
642 }
643 $classhash = $self->compactlist ($classhash);
644
645 my @tmparr = ();
646 foreach $subsection (sort keys (%$classhash)) {
647 push (@tmparr, $subsection);
648 }
649
650 # if there's a 0-9 section it will have been sorted to the beginning
651 # but we want it at the end
652 if ($tmparr[0] eq '0-9') {
653 shift @tmparr;
654 push (@tmparr, '0-9');
655 }
656
657 foreach $subclass (@tmparr)
658 {
659 my $tempclassify
660 = (scalar(@tmparr)==1)
661 ? ($self->get_entry(" ", "VList", $metaname))
662 : ($self->get_entry($subclass, "VList", $metaname));
663
664
665 foreach $subsubOID (@{$classhash->{$subclass}})
666 {
667 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
668 && defined $self->{'classifiers'}->{$1})
669 {
670 push (@{$tempclassify->{'contains'}},
671 $self->{'classifiers'}->{$1}->{'classifyinfo'});
672 }
673 else
674 {
675 $subsubOID =~ s/^Metavalue_(\d+)\.//;
676 my $metaname_offset = $1 -1;
677 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
678 push (@{$tempclassify->{'contains'}}, $oid_rec);
679 }
680 }
681 push (@{$classifyinfo->{'contains'}}, $tempclassify);
682 }
683
684 return $classifyinfo;
685}
686
687sub compactlist {
688 my $self = shift (@_);
689 my ($classhashref) = @_;
690 my $compactedhash = {};
691 my @currentOIDs = ();
692 my $currentfirstletter = "";
693 my $currentlastletter = "";
694 my $lastkey = "";
695
696 # minimum and maximum documents to be displayed per page.
697 # the actual maximum will be max + (min-1).
698 # the smallest sub-section is a single letter at present
699 # so in this case there may be many times max documents
700 # displayed on a page.
701 my $min = $self->{'mincompact'};
702 my $max = $self->{'maxcompact'};
703
704 foreach $subsection (sort keys %$classhashref) {
705 if ($subsection eq '0-9') {
706 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
707 next;
708 }
709 $currentfirstletter = $subsection if $currentfirstletter eq "";
710 if ((scalar (@currentOIDs) < $min) ||
711 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
712 push (@currentOIDs, @{$classhashref->{$subsection}});
713 $currentlastletter = $subsection;
714 } else {
715
716 if ($currentfirstletter eq $currentlastletter) {
717 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
718 $lastkey = $currentfirstletter;
719 } else {
720 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
721 $lastkey = "$currentfirstletter-$currentlastletter";
722 }
723 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
724 $compactedhash->{$subsection} = $classhashref->{$subsection};
725 @currentOIDs = ();
726 $currentfirstletter = "";
727 } else {
728 @currentOIDs = @{$classhashref->{$subsection}};
729 $currentfirstletter = $subsection;
730 $currentlastletter = $subsection;
731 }
732 }
733 }
734
735 # add final OIDs to last sub-classification if there aren't many otherwise
736 # add final sub-classification
737 if (scalar (@currentOIDs) < $min) {
738 my ($newkey) = $lastkey =~ /^(.)/;
739 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
740 delete $compactedhash->{$lastkey};
741 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
742 } else {
743 if ($currentfirstletter eq $currentlastletter) {
744 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
745 } else {
746 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
747 }
748 }
749
750 return $compactedhash;
751}
752
7531;
754
755
Note: See TracBrowser for help on using the repository browser.