source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 3540

Last change on this file since 3540 was 3540, checked in by kjdon, 22 years ago

added John T's changes into CVS - added info to enable retrieval of usage info in xml

  • Property svn:keywords set to Author Date Id Revision
File size: 21.8 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28#
29# metadata=Metaname -- all documents with Metaname metadata
30# will be included in list, list will be sorted
31# by this element.
32# buttonname=Title -- (optional) the title field for this classification.
33# if not included title field will be Metaname.
34# mingroup=Num -- (optional) the smallest value that will cause
35# a group in the hierarchy to form.
36# minnesting=Num -- (optional) the smallest value that will cause a
37# list to converted into nested list
38# mincompact=Num -- (optional) used in compact list
39# maxcompact=Num -- (optional) used in compact list
40# doclevel=top|section -- (optional) level to process document at.
41# onlyfirst -- (optional) control whether all or only first
42# metadata value used from array of metadata
43package AZCompactList;
44
45use BasClas;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52my $doclevel_list =
53[ { 'name' => "top",
54 'desc' => "Whole document." } ,
55{ 'name' => "section",
56 'desc' => "By sections." }
57];
58
59my $arguments =
60[ { 'name' => "metadata",
61 'desc' => "Metadata field used for classification. List will be sorted by this element.",
62 'type' => "metadata",
63 'reqd' => "yes" } ,
64{ 'name' => "buttonname",
65 'desc' => "Button name for this classification. Defaults to metadata name.",
66 'type' => "string",
67 'reqd' => "no" } ,
68{ 'name' => "mingroup",
69 'desc' => "The smallest value that will cause a group in the hierarchy to form.",
70 'type' => "int",
71 'reqd' => "no" } ,
72{ 'name' => "minnesting",
73 'desc' => "The smallest value that will cause a list to converted into nested list.",
74 'type' => "int",
75 'reqd' => "no" } ,
76{ 'name' => "mincompact",
77 'desc' => "Used in compact list.",
78 'type' => "int",
79 'reqd' => "no" } ,
80{ 'name' => "maxcompact",
81 'desc' => "Used in compact list.",
82 'type' => "int",
83 'reqd' => "no" } ,
84{ 'name' => "doclevel",
85 'desc' => "Level to process document at.",
86 'type' => "enum",
87 'list' => $doclevel_list,
88 'reqd' => "no" } ,
89{ 'name' => "onlyfirst",
90 'desc' => "Control whether all or only first metadata value used from array of metadata.",
91 'type' => "flag",
92 'reqd' => "no" }
93];
94
95my $options =
96{ 'name' => "AZCompactList",
97 'desc' => "Classifier plugin for sorting alphabetically",
98 'inherits' => "Yes",
99 'args' => $arguments };
100
101sub print_usage {
102 print STDERR "
103 usage: classify AZCompactList -metadata X [options]
104 options:
105 -metadata X (required) Metadata field used for classification
106 -buttonname X Title to use on web pages (defaults to metadata)
107 -removeprefix regex pattern to remove from metadata before sorting
108 -doclevel top|section (Defaults to top)
109 -freqsort Sort by node frequency rather than alpha-numeric
110 -mingroup N Minimum num of documents required to form a new group
111 -minnesting N Minimum list size to become a nested list
112 -mincompact N Used in compact list
113 -maxcompact N Used in compact list
114 -onlyfirst Only use the first value if metadata is repeated.
115 -recopt
116";
117}
118
119sub new {
120 my $class = shift (@_);
121 my $self = new BasClas($class, @_);
122
123 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
124 my $option_list = $self->{'option_list'};
125 push( @{$option_list}, $options );
126
127
128 my ($metaname, $title, $removeprefix);
129 my $mingroup = 2;
130 my $minnesting = 20;
131 my $mincompact = 10;
132 my $maxcompact = 30;
133 my $doclevel = "top";
134 my $onlyfirst = 0;
135 my $freqsort = 0;
136 my $recopt = undef;
137
138 if (!parsargv::parse(\@_,
139 q^metadata/.*/^, \$metaname,
140 q^buttonname/.*/^, \$title,
141 q^removeprefix/.*/^, \$removeprefix,
142 q^mingroup/.*/2^, \$mingroup,
143 q^minnesting/.*/20^, \$minnesting,
144 q^mincompact/.*/10^, \$mincompact,
145 q^maxcompact/.*/30^, \$maxcompact,
146 q^doclevel/.*/top^, \$doclevel,
147 q^onlyfirst/.*/0^, \$onlyfirst,
148 q^freqsort/.*/0^, \$freqsort,
149 q^recopt/.*/-1^, \$recopt,
150
151 "allow_extra_options")) {
152
153 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
154 &print_usage();
155 die "\n";
156 }
157
158 if (!defined $metaname) {
159 my $outhandle = $self->{'outhandle'};
160 print $outhandle "AZCompactList used with no metadata name to classify by\n";
161 die "\n";
162 }
163
164 $title = $metaname unless ($title);
165
166 $self->{'list'} = {};
167 $self->{'listmetavalue'} = {};
168 $self->{'reclassify'} = {};
169 $self->{'reclassifylist'} = {};
170 $self->{'metaname'} = $metaname;
171 $self->{'title'} = "$title"; # title for the titlebar.
172 if (defined($removeprefix) && $removeprefix) {
173 $self->{'removeprefix'} = $removeprefix;
174 }
175 $self->{'mingroup'} = $mingroup;
176 $self->{'minnesting'} = $minnesting;
177 $self->{'mincompact'} = $mincompact;
178 $self->{'maxcompact'} = $maxcompact;
179 $self->{'doclevel'} = $doclevel;
180
181 if ($onlyfirst != 0) {
182 $onlyfirst = 1;
183 }
184 $self->{'onlyfirst'} = $onlyfirst;
185
186 if ($freqsort != 0) {
187 $freqsort = 1;
188 }
189 $self->{'freqsort'} = $freqsort;
190
191 if ($recopt == -1) {
192 $recopt = undef;
193 } else {
194 $recopt = "on";
195 }
196 $self->{'recopt'} = $recopt;
197
198 return bless $self, $class;
199}
200
201sub init
202{
203 my $self = shift (@_);
204
205 $self->{'list'} = {};
206 $self->{'listmetavalue'} = {};
207 $self->{'reclassify'} = {};
208 $self->{'reclassifylist'} = {};
209}
210
211$tmp = 0;
212
213sub classify
214{
215 my $self = shift (@_);
216 my ($doc_obj) = @_;
217
218 my $doc_OID = $doc_obj->get_OID();
219
220 my @sectionlist = ();
221 my $topsection = $doc_obj->get_top_section();
222
223 my $metaname = $self->{'metaname'};
224 my $outhandle = $self->{'outhandle'};
225
226 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
227
228 if ($self->{'doclevel'} =~ /^top(level)?/i)
229 {
230 push(@sectionlist,$topsection);
231 }
232 else
233 {
234 my $thissection = $doc_obj->get_next_section($topsection);
235 while (defined $thissection)
236 {
237 push(@sectionlist,$thissection);
238 $thissection = $doc_obj->get_next_section ($thissection);
239 }
240 }
241
242 my $thissection;
243 foreach $thissection (@sectionlist)
244 {
245 my $full_doc_OID
246 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
247
248 if (defined $self->{'list'}->{$full_doc_OID})
249 {
250 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
251 }
252 $self->{'list'}->{$full_doc_OID} = [];
253 $self->{'listmetavalue'}->{$full_doc_OID} = [];
254
255 my $metavalues = $doc_obj->get_metadata($thissection,$metaname);
256 my $metavalue;
257 foreach $metavalue (@$metavalues)
258 {
259 # if this document doesn't contain the metadata element we're
260 # sorting by we won't include it in this classification
261 if (defined $metavalue && $metavalue =~ /\w/)
262 {
263 if ($self->{'removeprefix'}) {
264 $metavalue =~ s/^$self->{'removeprefix'}//;
265 }
266
267 my $formatted_metavalue = $metavalue;
268
269 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
270 {
271 &sorttools::format_string_name_english (\$formatted_metavalue);
272 }
273 else
274 {
275 &sorttools::format_string_english (\$formatted_metavalue);
276 }
277
278 #### prefix-str
279 if (! defined($formatted_metavalue)) {
280 print $outhandle "Warning: AZCompactList: metavalue is ";
281 print $outhandle "empty\n";
282 $formatted_metavalue="";
283 }
284
285 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
286 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
287
288 last if ($self->{'onlyfirst'});
289 }
290 }
291 my $date = $doc_obj->get_metadata_element($thissection,"Date");
292 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
293 }
294}
295
296sub reinit
297{
298 my ($self,$classlist_ref) = @_;
299 my $outhandle = $self->{'outhandle'};
300
301 my %mtfreq = ();
302 my @single_classlist = ();
303 my @multiple_classlist = ();
304
305 # find out how often each metavalue occurs
306 map
307 {
308 my $mv;
309 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
310 {
311 $mtfreq{$mv}++;
312 }
313 } @$classlist_ref;
314
315 # use this information to split the list: single metavalue/repeated value
316 map
317 {
318 my $i = 1;
319 my $metavalue;
320 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
321 {
322 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
323 {
324 push(@multiple_classlist,[$_,$i,$metavalue]);
325 }
326 else
327 {
328 push(@single_classlist,[$_,$metavalue]);
329 $metavalue =~ tr/[A-Z]/[a-z]/;
330 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
331 }
332 $i++;
333 }
334 } @$classlist_ref;
335
336
337 # Setup sub-classifiers for multiple list
338
339 $self->{'classifiers'} = {};
340
341 my $pm;
342 foreach $pm ("List", "SectionList")
343 {
344 my $listname
345 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
346 if (-e $listname) { require $listname; }
347 else
348 {
349 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
350 die "\n";
351 }
352 }
353
354 # Create classifiers objects for each entry >= mingroup
355 my $metavalue;
356 foreach $metavalue (keys %mtfreq)
357 {
358 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
359 {
360 my $listclassobj;
361 my $doclevel = $self->{'doclevel'};
362 my $metaname = $self->{'metaname'};
363 my @metaname_list = split('/',$metaname);
364 $metaname = shift(@metaname_list);
365 if (@metaname_list==0)
366 {
367 my @args;
368 push @args, ("-metadata", "$metaname");
369# buttonname is also used for the node's title
370 push @args, ("-buttonname", "$metavalue");
371 push @args, ("-sort", "Date");
372
373 if ($doclevel =~ m/^top(level)?/i)
374 {
375 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
376 }
377 else
378 {
379 eval ("\$listclassobj = new SectionList(\@args)");
380 }
381 }
382 else
383 {
384 $metaname = join('/',@metaname_list);
385
386 my @args;
387 push @args, ("-metadata", "$metaname");
388# buttonname is also used for the node's title
389 push @args, ("-buttonname", "$metavalue");
390 push @args, ("-doclevel", "$doclevel");
391 push @args, "-recopt";
392
393 eval ("\$listclassobj = new AZCompactList(\@args)");
394 }
395 if ($@) {
396 print $outhandle "$@";
397 die "\n";
398 }
399
400 $listclassobj->init();
401
402 if (defined $metavalue && $metavalue =~ /\w/)
403 {
404 my $formatted_node = $metavalue;
405
406 if ($self->{'removeprefix'}) {
407 $formatted_node =~ s/^$self->{'removeprefix'}//;
408 }
409
410 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
411 {
412 &sorttools::format_string_name_english(\$formatted_node);
413 }
414 else
415 {
416 &sorttools::format_string_english(\$formatted_node);
417 }
418
419 # In case our formatted string is empty...
420 if (! defined($formatted_node)) {
421 print $outhandle "Warning: AZCompactList: metavalue is ";
422 print $outhandle "empty\n";
423 $formatted_node="";
424 }
425
426 $self->{'classifiers'}->{$metavalue}
427 = { 'classifyobj' => $listclassobj,
428 'formattednode' => $formatted_node };
429 }
430 }
431 }
432
433
434 return (\@single_classlist,\@multiple_classlist);
435}
436
437
438sub reclassify
439{
440 my ($self,$multiple_cl_ref) = @_;
441
442 # Entries in the current classify list that are "book nodes"
443 # should be recursively classified.
444 #--
445 foreach $dm_pair (@$multiple_cl_ref)
446 {
447 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
448 my $listclassobj;
449
450 # find metavalue in list of sub-classifiers
451 my $found = 0;
452 my $node_name;
453 foreach $node_name (keys %{$self->{'classifiers'}})
454 {
455 $resafe_node_name = $node_name;
456 # escape chars that mean something to perl...
457 $resafe_node_name =~ s/([\\\(\)\[\]\{\}\^\$\.\+\*\?\|])/\\$1/g;
458 if ($metavalue =~ m/^$resafe_node_name$/i)
459 {
460 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
461
462 ## date appears to not be used in classifier call ####
463
464 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
465 {
466 my $section=$1;
467 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
468 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
469 ->classify($doc_obj, "Section=$section");
470 } else { # section level
471 # Thanks to Don Gourley for this...
472 # classify can't handle multi-level section
473 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
474 ->classify_section($section, $doc_obj, $date);
475 }
476 }
477 else
478 {
479 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
480 ->classify($doc_obj);
481 }
482
483 $found = 1;
484 last;
485 }
486 }
487
488 if (!$found)
489 {
490 my $outhandle=$self->{outhandle};
491 print $outhandle "Warning: AZCompactList::reclassify ";
492 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
493 }
494 }
495}
496
497
498
499sub get_reclassify_info
500{
501 my $self = shift (@_);
502
503 my $node_name;
504 foreach $node_name (keys %{$self->{'classifiers'}})
505 {
506 my $classifyinfo
507 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
508 ->get_classify_info();
509 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
510 = $classifyinfo;
511 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
512 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
513 }
514}
515
516
517sub alpha_numeric_cmp
518{
519 my ($self,$a,$b) = @_;
520
521 my $title_a = $self->{'reclassifylist'}->{$a};
522 my $title_b = $self->{'reclassifylist'}->{$b};
523
524 if ($title_a =~ m/^(\d+(\.\d+)?)/)
525 {
526 my $val_a = $1;
527 if ($title_b =~ m/^(\d+(\.\d+)?)/)
528 {
529 my $val_b = $1;
530 if ($val_a != $val_b)
531 {
532 return ($val_a <=> $val_b);
533 }
534 }
535 }
536
537 return ($title_a cmp $title_b);
538}
539
540sub frequency_cmp
541{
542 my ($self,$a,$b) = @_;
543
544
545 my $title_a = $self->{'reclassifylist'}->{$a};
546 my $title_b = $self->{'reclassifylist'}->{$b};
547
548 my $a_freq = 1;
549 my $b_freq = 1;
550
551 if ($a =~ m/^CLASSIFY\.(.*)$/)
552 {
553 my $a_node = $1;
554 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
555 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
556 }
557
558 if ($b =~ m/^CLASSIFY\.(.*)$/)
559 {
560 my $b_node = $1;
561 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
562 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
563 }
564
565 return $b_freq <=> $a_freq;
566}
567
568sub get_classify_info {
569 my $self = shift (@_);
570
571 my @classlist =keys %{$self->{'list'}}; # list all doc oids
572
573 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
574 $self->reclassify($multiple_cl_ref);
575 $self->get_reclassify_info();
576
577 my @reclassified_classlist;
578 if ($self->{'freqsort'})
579 {
580 @reclassified_classlist
581 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
582 # supress sub-grouping by alphabet
583 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
584 }
585 else
586 {
587# @reclassified_classlist
588# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
589
590 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
591
592 @reclassified_classlist
593 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
594
595 }
596
597 return $self->splitlist (\@reclassified_classlist);
598}
599
600sub get_entry {
601 my $self = shift (@_);
602 my ($title, $childtype, $metaname, $thistype) = @_;
603
604 # organise into classification structure
605 my %classifyinfo = ('childtype'=>$childtype,
606 'Title'=>$title,
607 'contains'=>[],
608 'mdtype'=>$metaname);
609
610 $classifyinfo{'thistype'} = $thistype
611 if defined $thistype && $thistype =~ /\w/;
612
613 return \%classifyinfo;
614}
615
616
617
618# splitlist takes an ordered list of classifications (@$classlistref) and
619# splits it up into alphabetical sub-sections.
620sub splitlist {
621 my $self = shift (@_);
622 my ($classlistref) = @_;
623 my $classhash = {};
624
625 # top level
626 my @metanames = split("/",$self->{'metaname'});
627 my $metaname = shift(@metanames);
628
629 my $childtype = "HList";
630 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
631
632 my $classifyinfo;
633 if (!defined($self->{'recopt'}))
634 {
635 my $title = $self->{'title'}; # should always be defined by now....
636 $title = $metaname unless defined $title;
637 $classifyinfo
638 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
639 }
640 else
641 {
642 my $title = $self->{'title'};
643 $classifyinfo
644 = $self->get_entry ($title, $childtype, $metaname, "VList");
645 }
646
647 # don't need to do any splitting if there are less than 'minnesting' classifications
648 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
649 foreach $subOID (@$classlistref) {
650 if ($subOID =~ /^CLASSIFY\.(.*)$/
651 && defined $self->{'classifiers'}->{$1})
652 {
653 push (@{$classifyinfo->{'contains'}},
654 $self->{'classifiers'}->{$1}->{'classifyinfo'});
655 }
656 else
657 {
658 $subOID =~ s/^Metavalue_(\d+)\.//;
659 my $metaname_offset = $1 -1;
660 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
661 push (@{$classifyinfo->{'contains'}}, $oid_rec);
662 }
663 }
664 return $classifyinfo;
665 }
666
667 # first split up the list into separate A-Z and 0-9 classifications
668 foreach $classification (@$classlistref) {
669 my $title = $self->{'reclassifylist'}->{$classification};
670 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
671### $title =~ s/^\s+//g; # remove a leading spaces
672### $title =~ s/^_+//g; # remove a leading underscores
673 $title =~ s/^\W+//g;
674### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
675
676# only want first character for classification
677 $title =~ m/^(.)/; $title=$1;
678 $title =~ tr/[a-z]/[A-Z]/;
679
680 if ($title =~ /^[0-9]$/) {$title = '0-9';}
681 elsif ($title !~ /^[A-Z]$/) {
682 my $outhandle = $self->{'outhandle'};
683 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
684 }
685 $classhash->{$title} = [] unless defined $classhash->{$title};
686 push (@{$classhash->{$title}}, $classification);
687 }
688 $classhash = $self->compactlist ($classhash);
689
690 my @tmparr = ();
691 foreach $subsection (sort keys (%$classhash)) {
692 push (@tmparr, $subsection);
693 }
694
695 # if there's a 0-9 section it will have been sorted to the beginning
696 # but we want it at the end
697 if ($tmparr[0] eq '0-9') {
698 shift @tmparr;
699 push (@tmparr, '0-9');
700 }
701
702 foreach $subclass (@tmparr)
703 {
704 my $tempclassify
705 = (scalar(@tmparr)==1)
706 ? ($self->get_entry(" ", "VList", $metaname))
707 : ($self->get_entry($subclass, "VList", $metaname));
708
709
710 foreach $subsubOID (@{$classhash->{$subclass}})
711 {
712 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
713 && defined $self->{'classifiers'}->{$1})
714 {
715 push (@{$tempclassify->{'contains'}},
716 $self->{'classifiers'}->{$1}->{'classifyinfo'});
717 }
718 else
719 {
720 $subsubOID =~ s/^Metavalue_(\d+)\.//;
721 my $metaname_offset = $1 -1;
722 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
723 push (@{$tempclassify->{'contains'}}, $oid_rec);
724 }
725 }
726 push (@{$classifyinfo->{'contains'}}, $tempclassify);
727 }
728
729 return $classifyinfo;
730}
731
732sub compactlist {
733 my $self = shift (@_);
734 my ($classhashref) = @_;
735 my $compactedhash = {};
736 my @currentOIDs = ();
737 my $currentfirstletter = "";
738 my $currentlastletter = "";
739 my $lastkey = "";
740
741 # minimum and maximum documents to be displayed per page.
742 # the actual maximum will be max + (min-1).
743 # the smallest sub-section is a single letter at present
744 # so in this case there may be many times max documents
745 # displayed on a page.
746 my $min = $self->{'mincompact'};
747 my $max = $self->{'maxcompact'};
748
749 foreach $subsection (sort keys %$classhashref) {
750 if ($subsection eq '0-9') {
751 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
752 next;
753 }
754 $currentfirstletter = $subsection if $currentfirstletter eq "";
755 if ((scalar (@currentOIDs) < $min) ||
756 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
757 push (@currentOIDs, @{$classhashref->{$subsection}});
758 $currentlastletter = $subsection;
759 } else {
760
761 if ($currentfirstletter eq $currentlastletter) {
762 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
763 $lastkey = $currentfirstletter;
764 } else {
765 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
766 $lastkey = "$currentfirstletter-$currentlastletter";
767 }
768 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
769 $compactedhash->{$subsection} = $classhashref->{$subsection};
770 @currentOIDs = ();
771 $currentfirstletter = "";
772 } else {
773 @currentOIDs = @{$classhashref->{$subsection}};
774 $currentfirstletter = $subsection;
775 $currentlastletter = $subsection;
776 }
777 }
778 }
779
780 # add final OIDs to last sub-classification if there aren't many otherwise
781 # add final sub-classification
782 if (scalar (@currentOIDs) < $min) {
783 my ($newkey) = $lastkey =~ /^(.)/;
784 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
785 delete $compactedhash->{$lastkey};
786 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
787 } else {
788 if ($currentfirstletter eq $currentlastletter) {
789 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
790 } else {
791 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
792 }
793 }
794
795 return $compactedhash;
796}
797
7981;
799
800
Note: See TracBrowser for help on using the repository browser.