source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 4778

Last change on this file since 4778 was 4759, checked in by mdewsnip, 21 years ago

Tidied up 'options' and 'arguments' structures (representing the options of the plugin) in preparation for removing the print_usage() routines.

  • Property svn:keywords set to Author Date Id Revision
File size: 22.5 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28#
29# metadata=Metaname -- all documents with Metaname metadata
30# will be included in list, list will be sorted
31# by this element.
32# buttonname=Title -- (optional) the title field for this classification.
33# if not included title field will be Metaname.
34# mingroup=Num -- (optional) the smallest value that will cause
35# a group in the hierarchy to form.
36# minnesting=Num -- (optional) the smallest value that will cause a
37# list to converted into nested list
38# mincompact=Num -- (optional) used in compact list
39# maxcompact=Num -- (optional) used in compact list
40# doclevel=top|section -- (optional) level to process document at.
41# onlyfirst -- (optional) control whether all or only first
42# metadata value used from array of metadata
43package AZCompactList;
44
45use BasClas;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52my $doclevel_list =
53 [ { 'name' => "top",
54 'desc' => "Whole document." } ,
55 { 'name' => "section",
56 'desc' => "By sections." } ];
57
58my $arguments =
59 [ { 'name' => "metadata",
60 'desc' => "Metadata field used for classification. List will be sorted by this element.",
61 'type' => "metadata",
62 'reqd' => "yes" },
63 { 'name' => "buttonname",
64 'desc' => "Button name for this classification.",
65 'type' => "string",
66 'deft' => "Metadata element specified with -metadata",
67 'reqd' => "no" },
68 { 'name' => "removeprefix",
69 'desc' => "A prefix to ignore in the Metadata values for the field when sorting.",
70 'type' => "string",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "mingroup",
74 'desc' => "The smallest value that will cause a group in the hierarchy to form.",
75 'type' => "int",
76 'deft' => "2",
77 'reqd' => "no" },
78 { 'name' => "minnesting",
79 'desc' => "The smallest value that will cause a list to converted into nested list.",
80 'type' => "int",
81 'deft' => "20",
82 'reqd' => "no" },
83 { 'name' => "mincompact",
84 'desc' => "Minimum number of documents to be displayed per page.",
85 'type' => "int",
86 'deft' => "10",
87 'reqd' => "no" },
88 { 'name' => "maxcompact",
89 'desc' => "Maximum number of documents to be displayed per page.",
90 'type' => "int",
91 'deft' => "30",
92 'reqd' => "no" },
93 { 'name' => "doclevel",
94 'desc' => "Level to process document at.",
95 'type' => "enum",
96 'list' => $doclevel_list,
97 'deft' => "top",
98 'reqd' => "no" },
99 { 'name' => "onlyfirst",
100 'desc' => "Control whether all or only first metadata value used from array of metadata.",
101 'type' => "flag",
102 'reqd' => "no" },
103 { 'name' => "freqsort",
104 'desc' => "Sort by node frequency rather than alpha-numeric.",
105 'type' => "flag"},
106 { 'name' => "recopt",
107 'desc' => "?",
108 'type' => "string",
109 'deft' => "" } ];
110
111my $options =
112{ 'name' => "AZCompactList",
113 'desc' => "Classifier plugin for sorting alphabetically",
114 'inherits' => "Yes",
115 'args' => $arguments };
116
117sub print_usage {
118 print STDERR "
119 usage: classify AZCompactList -metadata X [options]
120 options:
121 -metadata X (required) Metadata field used for classification
122 -buttonname X Title to use on web pages (defaults to metadata)
123 -removeprefix regex pattern to remove from metadata before sorting
124 -doclevel top|section (Defaults to top)
125 -freqsort Sort by node frequency rather than alpha-numeric
126 -mingroup N Minimum num of documents required to form a new group
127 -minnesting N Minimum list size to become a nested list
128 -mincompact N Used in compact list
129 -maxcompact N Used in compact list
130 -onlyfirst Only use the first value if metadata is repeated.
131 -recopt
132";
133}
134
135sub new {
136 my $class = shift (@_);
137 my $self = new BasClas($class, @_);
138
139 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
140 my $option_list = $self->{'option_list'};
141 push( @{$option_list}, $options );
142
143
144 my ($metaname, $title, $removeprefix);
145 my $mingroup = 2;
146 my $minnesting = 20;
147 my $mincompact = 10;
148 my $maxcompact = 30;
149 my $doclevel = "top";
150 my $onlyfirst = 0;
151 my $freqsort = 0;
152 my $recopt = undef;
153
154 if (!parsargv::parse(\@_,
155 q^metadata/.*/^, \$metaname,
156 q^buttonname/.*/^, \$title,
157 q^removeprefix/.*/^, \$removeprefix,
158 q^mingroup/.*/2^, \$mingroup,
159 q^minnesting/.*/20^, \$minnesting,
160 q^mincompact/.*/10^, \$mincompact,
161 q^maxcompact/.*/30^, \$maxcompact,
162 q^doclevel/.*/top^, \$doclevel,
163 q^onlyfirst/.*/0^, \$onlyfirst,
164 q^freqsort/.*/0^, \$freqsort,
165 q^recopt/.*/-1^, \$recopt,
166
167 "allow_extra_options")) {
168
169 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
170 &print_usage();
171 die "\n";
172 }
173
174 if (!defined $metaname) {
175 my $outhandle = $self->{'outhandle'};
176 print $outhandle "AZCompactList used with no metadata name to classify by\n";
177 die "\n";
178 }
179
180 $title = $metaname unless ($title);
181
182 $self->{'list'} = {};
183 $self->{'listmetavalue'} = {};
184 $self->{'reclassify'} = {};
185 $self->{'reclassifylist'} = {};
186 $self->{'metaname'} = $metaname;
187 $self->{'title'} = "$title"; # title for the titlebar.
188 if (defined($removeprefix) && $removeprefix) {
189 $self->{'removeprefix'} = $removeprefix;
190 }
191 $self->{'mingroup'} = $mingroup;
192 $self->{'minnesting'} = $minnesting;
193 $self->{'mincompact'} = $mincompact;
194 $self->{'maxcompact'} = $maxcompact;
195 $self->{'doclevel'} = $doclevel;
196
197 if ($onlyfirst != 0) {
198 $onlyfirst = 1;
199 }
200 $self->{'onlyfirst'} = $onlyfirst;
201
202 if ($freqsort != 0) {
203 $freqsort = 1;
204 }
205 $self->{'freqsort'} = $freqsort;
206
207 if ($recopt == -1) {
208 $recopt = undef;
209 } else {
210 $recopt = "on";
211 }
212 $self->{'recopt'} = $recopt;
213
214 return bless $self, $class;
215}
216
217sub init
218{
219 my $self = shift (@_);
220
221 $self->{'list'} = {};
222 $self->{'listmetavalue'} = {};
223 $self->{'reclassify'} = {};
224 $self->{'reclassifylist'} = {};
225}
226
227$tmp = 0;
228
229sub classify
230{
231 my $self = shift (@_);
232 my ($doc_obj) = @_;
233
234 my $doc_OID = $doc_obj->get_OID();
235
236 my @sectionlist = ();
237 my $topsection = $doc_obj->get_top_section();
238
239 my $metaname = $self->{'metaname'};
240 my $outhandle = $self->{'outhandle'};
241
242 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
243
244 if ($self->{'doclevel'} =~ /^top(level)?/i)
245 {
246 push(@sectionlist,$topsection);
247 }
248 else
249 {
250 my $thissection = $doc_obj->get_next_section($topsection);
251 while (defined $thissection)
252 {
253 push(@sectionlist,$thissection);
254 $thissection = $doc_obj->get_next_section ($thissection);
255 }
256 }
257
258 my $thissection;
259 foreach $thissection (@sectionlist)
260 {
261 my $full_doc_OID
262 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
263
264 if (defined $self->{'list'}->{$full_doc_OID})
265 {
266 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
267 }
268 $self->{'list'}->{$full_doc_OID} = [];
269 $self->{'listmetavalue'}->{$full_doc_OID} = [];
270
271 my $metavalues = $doc_obj->get_metadata($thissection,$metaname);
272 my $metavalue;
273 foreach $metavalue (@$metavalues)
274 {
275 # if this document doesn't contain the metadata element we're
276 # sorting by we won't include it in this classification
277 if (defined $metavalue && $metavalue =~ /\w/)
278 {
279 if ($self->{'removeprefix'}) {
280 $metavalue =~ s/^$self->{'removeprefix'}//;
281
282 # check that it's not now empty
283 if (!$metavalue) {next;}
284 }
285
286 my $formatted_metavalue = $metavalue;
287
288 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
289 {
290 &sorttools::format_string_name_english (\$formatted_metavalue);
291 }
292 else
293 {
294 &sorttools::format_string_english (\$formatted_metavalue);
295 }
296
297 #### prefix-str
298 if (! defined($formatted_metavalue)) {
299 print $outhandle "Warning: AZCompactList: metavalue is ";
300 print $outhandle "empty\n";
301 $formatted_metavalue="";
302 }
303
304 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
305 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
306
307 last if ($self->{'onlyfirst'});
308 }
309 }
310 my $date = $doc_obj->get_metadata_element($thissection,"Date");
311 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
312 }
313}
314
315sub reinit
316{
317 my ($self,$classlist_ref) = @_;
318 my $outhandle = $self->{'outhandle'};
319
320 my %mtfreq = ();
321 my @single_classlist = ();
322 my @multiple_classlist = ();
323
324 # find out how often each metavalue occurs
325 map
326 {
327 my $mv;
328 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
329 {
330 $mtfreq{$mv}++;
331 }
332 } @$classlist_ref;
333
334 # use this information to split the list: single metavalue/repeated value
335 map
336 {
337 my $i = 1;
338 my $metavalue;
339 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
340 {
341 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
342 {
343 push(@multiple_classlist,[$_,$i,$metavalue]);
344 }
345 else
346 {
347 push(@single_classlist,[$_,$metavalue]);
348 $metavalue =~ tr/[A-Z]/[a-z]/;
349 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
350 }
351 $i++;
352 }
353 } @$classlist_ref;
354
355
356 # Setup sub-classifiers for multiple list
357
358 $self->{'classifiers'} = {};
359
360 my $pm;
361 foreach $pm ("List", "SectionList")
362 {
363 my $listname
364 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
365 if (-e $listname) { require $listname; }
366 else
367 {
368 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
369 die "\n";
370 }
371 }
372
373 # Create classifiers objects for each entry >= mingroup
374 my $metavalue;
375 foreach $metavalue (keys %mtfreq)
376 {
377 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
378 {
379 my $listclassobj;
380 my $doclevel = $self->{'doclevel'};
381 my $metaname = $self->{'metaname'};
382 my @metaname_list = split('/',$metaname);
383 $metaname = shift(@metaname_list);
384 if (@metaname_list==0)
385 {
386 my @args;
387 push @args, ("-metadata", "$metaname");
388# buttonname is also used for the node's title
389 push @args, ("-buttonname", "$metavalue");
390 push @args, ("-sort", "Date");
391
392 if ($doclevel =~ m/^top(level)?/i)
393 {
394 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
395 }
396 else
397 {
398 eval ("\$listclassobj = new SectionList(\@args)");
399 }
400 }
401 else
402 {
403 $metaname = join('/',@metaname_list);
404
405 my @args;
406 push @args, ("-metadata", "$metaname");
407# buttonname is also used for the node's title
408 push @args, ("-buttonname", "$metavalue");
409 push @args, ("-doclevel", "$doclevel");
410 push @args, "-recopt";
411
412 eval ("\$listclassobj = new AZCompactList(\@args)");
413 }
414 if ($@) {
415 print $outhandle "$@";
416 die "\n";
417 }
418
419 $listclassobj->init();
420
421 if (defined $metavalue && $metavalue =~ /\w/)
422 {
423 my $formatted_node = $metavalue;
424
425 if ($self->{'removeprefix'}) {
426 $formatted_node =~ s/^$self->{'removeprefix'}//;
427 # check that it's not now empty
428 if (!$formatted_node) {next;}
429 }
430
431 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
432 {
433 &sorttools::format_string_name_english(\$formatted_node);
434 }
435 else
436 {
437 &sorttools::format_string_english(\$formatted_node);
438 }
439
440 # In case our formatted string is empty...
441 if (! defined($formatted_node)) {
442 print $outhandle "Warning: AZCompactList: metavalue is ";
443 print $outhandle "empty\n";
444 $formatted_node="";
445 }
446
447 $self->{'classifiers'}->{$metavalue}
448 = { 'classifyobj' => $listclassobj,
449 'formattednode' => $formatted_node };
450 }
451 }
452 }
453
454
455 return (\@single_classlist,\@multiple_classlist);
456}
457
458
459sub reclassify
460{
461 my ($self,$multiple_cl_ref) = @_;
462
463 # Entries in the current classify list that are "book nodes"
464 # should be recursively classified.
465 #--
466 foreach $dm_pair (@$multiple_cl_ref)
467 {
468 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
469 my $listclassobj;
470
471 # find metavalue in list of sub-classifiers
472 my $found = 0;
473 my $node_name;
474 foreach $node_name (keys %{$self->{'classifiers'}})
475 {
476 $resafe_node_name = $node_name;
477 # escape chars that mean something to perl...
478 $resafe_node_name =~ s/([\\\(\)\[\]\{\}\^\$\.\+\*\?\|])/\\$1/g;
479 if ($metavalue =~ m/^$resafe_node_name$/i)
480 {
481 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
482
483 ## date appears to not be used in classifier call ####
484
485 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
486 {
487 my $section=$1;
488 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
489 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
490 ->classify($doc_obj, "Section=$section");
491 } else { # section level
492 # Thanks to Don Gourley for this...
493 # classify can't handle multi-level section
494 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
495 ->classify_section($section, $doc_obj, $date);
496 }
497 }
498 else
499 {
500 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
501 ->classify($doc_obj);
502 }
503
504 $found = 1;
505 last;
506 }
507 }
508
509 if (!$found)
510 {
511 my $outhandle=$self->{outhandle};
512 print $outhandle "Warning: AZCompactList::reclassify ";
513 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
514 }
515 }
516}
517
518
519
520sub get_reclassify_info
521{
522 my $self = shift (@_);
523
524 my $node_name;
525 foreach $node_name (keys %{$self->{'classifiers'}})
526 {
527 my $classifyinfo
528 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
529 ->get_classify_info();
530 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
531 = $classifyinfo;
532 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
533 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
534 }
535}
536
537
538sub alpha_numeric_cmp
539{
540 my ($self,$a,$b) = @_;
541
542 my $title_a = $self->{'reclassifylist'}->{$a};
543 my $title_b = $self->{'reclassifylist'}->{$b};
544
545 if ($title_a =~ m/^(\d+(\.\d+)?)/)
546 {
547 my $val_a = $1;
548 if ($title_b =~ m/^(\d+(\.\d+)?)/)
549 {
550 my $val_b = $1;
551 if ($val_a != $val_b)
552 {
553 return ($val_a <=> $val_b);
554 }
555 }
556 }
557
558 return ($title_a cmp $title_b);
559}
560
561sub frequency_cmp
562{
563 my ($self,$a,$b) = @_;
564
565
566 my $title_a = $self->{'reclassifylist'}->{$a};
567 my $title_b = $self->{'reclassifylist'}->{$b};
568
569 my $a_freq = 1;
570 my $b_freq = 1;
571
572 if ($a =~ m/^CLASSIFY\.(.*)$/)
573 {
574 my $a_node = $1;
575 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
576 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
577 }
578
579 if ($b =~ m/^CLASSIFY\.(.*)$/)
580 {
581 my $b_node = $1;
582 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
583 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
584 }
585
586 return $b_freq <=> $a_freq;
587}
588
589sub get_classify_info {
590 my $self = shift (@_);
591
592 my @classlist =keys %{$self->{'list'}}; # list all doc oids
593
594 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
595 $self->reclassify($multiple_cl_ref);
596 $self->get_reclassify_info();
597
598 my @reclassified_classlist;
599 if ($self->{'freqsort'})
600 {
601 @reclassified_classlist
602 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
603 # supress sub-grouping by alphabet
604 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
605 }
606 else
607 {
608# @reclassified_classlist
609# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
610
611 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
612
613 @reclassified_classlist
614 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
615
616 }
617
618 return $self->splitlist (\@reclassified_classlist);
619}
620
621sub get_entry {
622 my $self = shift (@_);
623 my ($title, $childtype, $metaname, $thistype) = @_;
624
625 # organise into classification structure
626 my %classifyinfo = ('childtype'=>$childtype,
627 'Title'=>$title,
628 'contains'=>[],
629 'mdtype'=>$metaname);
630
631 $classifyinfo{'thistype'} = $thistype
632 if defined $thistype && $thistype =~ /\w/;
633
634 return \%classifyinfo;
635}
636
637
638
639# splitlist takes an ordered list of classifications (@$classlistref) and
640# splits it up into alphabetical sub-sections.
641sub splitlist {
642 my $self = shift (@_);
643 my ($classlistref) = @_;
644 my $classhash = {};
645
646 # top level
647 my @metanames = split("/",$self->{'metaname'});
648 my $metaname = shift(@metanames);
649
650 my $childtype = "HList";
651 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
652
653 my $classifyinfo;
654 if (!defined($self->{'recopt'}))
655 {
656 my $title = $self->{'title'}; # should always be defined by now....
657 $title = $metaname unless defined $title;
658 $classifyinfo
659 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
660 }
661 else
662 {
663 my $title = $self->{'title'};
664 $classifyinfo
665 = $self->get_entry ($title, $childtype, $metaname, "VList");
666 }
667
668 # don't need to do any splitting if there are less than 'minnesting' classifications
669 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
670 foreach $subOID (@$classlistref) {
671 if ($subOID =~ /^CLASSIFY\.(.*)$/
672 && defined $self->{'classifiers'}->{$1})
673 {
674 push (@{$classifyinfo->{'contains'}},
675 $self->{'classifiers'}->{$1}->{'classifyinfo'});
676 }
677 else
678 {
679 $subOID =~ s/^Metavalue_(\d+)\.//;
680 my $metaname_offset = $1 -1;
681 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
682 push (@{$classifyinfo->{'contains'}}, $oid_rec);
683 }
684 }
685 return $classifyinfo;
686 }
687
688 # first split up the list into separate A-Z and 0-9 classifications
689 foreach $classification (@$classlistref) {
690 my $title = $self->{'reclassifylist'}->{$classification};
691 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
692### $title =~ s/^\s+//g; # remove a leading spaces
693### $title =~ s/^_+//g; # remove a leading underscores
694 $title =~ s/^\W+//g;
695### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
696
697# only want first character for classification
698 $title =~ m/^(.)/; $title=$1;
699 $title =~ tr/[a-z]/[A-Z]/;
700
701 if ($title =~ /^[0-9]$/) {$title = '0-9';}
702 elsif ($title !~ /^[A-Z]$/) {
703 my $outhandle = $self->{'outhandle'};
704 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
705 }
706 $classhash->{$title} = [] unless defined $classhash->{$title};
707 push (@{$classhash->{$title}}, $classification);
708 }
709 $classhash = $self->compactlist ($classhash);
710
711 my @tmparr = ();
712 foreach $subsection (sort keys (%$classhash)) {
713 push (@tmparr, $subsection);
714 }
715
716 # if there's a 0-9 section it will have been sorted to the beginning
717 # but we want it at the end
718 if ($tmparr[0] eq '0-9') {
719 shift @tmparr;
720 push (@tmparr, '0-9');
721 }
722
723 foreach $subclass (@tmparr)
724 {
725 my $tempclassify
726 = (scalar(@tmparr)==1)
727 ? ($self->get_entry(" ", "VList", $metaname))
728 : ($self->get_entry($subclass, "VList", $metaname));
729
730
731 foreach $subsubOID (@{$classhash->{$subclass}})
732 {
733 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
734 && defined $self->{'classifiers'}->{$1})
735 {
736 push (@{$tempclassify->{'contains'}},
737 $self->{'classifiers'}->{$1}->{'classifyinfo'});
738 }
739 else
740 {
741 $subsubOID =~ s/^Metavalue_(\d+)\.//;
742 my $metaname_offset = $1 -1;
743 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
744 push (@{$tempclassify->{'contains'}}, $oid_rec);
745 }
746 }
747 push (@{$classifyinfo->{'contains'}}, $tempclassify);
748 }
749
750 return $classifyinfo;
751}
752
753sub compactlist {
754 my $self = shift (@_);
755 my ($classhashref) = @_;
756 my $compactedhash = {};
757 my @currentOIDs = ();
758 my $currentfirstletter = "";
759 my $currentlastletter = "";
760 my $lastkey = "";
761
762 # minimum and maximum documents to be displayed per page.
763 # the actual maximum will be max + (min-1).
764 # the smallest sub-section is a single letter at present
765 # so in this case there may be many times max documents
766 # displayed on a page.
767 my $min = $self->{'mincompact'};
768 my $max = $self->{'maxcompact'};
769
770 foreach $subsection (sort keys %$classhashref) {
771 if ($subsection eq '0-9') {
772 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
773 next;
774 }
775 $currentfirstletter = $subsection if $currentfirstletter eq "";
776 if ((scalar (@currentOIDs) < $min) ||
777 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
778 push (@currentOIDs, @{$classhashref->{$subsection}});
779 $currentlastletter = $subsection;
780 } else {
781
782 if ($currentfirstletter eq $currentlastletter) {
783 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
784 $lastkey = $currentfirstletter;
785 } else {
786 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
787 $lastkey = "$currentfirstletter-$currentlastletter";
788 }
789 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
790 $compactedhash->{$subsection} = $classhashref->{$subsection};
791 @currentOIDs = ();
792 $currentfirstletter = "";
793 } else {
794 @currentOIDs = @{$classhashref->{$subsection}};
795 $currentfirstletter = $subsection;
796 $currentlastletter = $subsection;
797 }
798 }
799 }
800
801 # add final OIDs to last sub-classification if there aren't many otherwise
802 # add final sub-classification
803 if (scalar (@currentOIDs) < $min) {
804 my ($newkey) = $lastkey =~ /^(.)/;
805 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
806 delete $compactedhash->{$lastkey};
807 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
808 } else {
809 if ($currentfirstletter eq $currentlastletter) {
810 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
811 } else {
812 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
813 }
814 }
815
816 return $compactedhash;
817}
818
8191;
820
821
Note: See TracBrowser for help on using the repository browser.