source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 5729

Last change on this file since 5729 was 5729, checked in by kjdon, 21 years ago

replaced some text with keys in the options list

  • Property svn:keywords set to Author Date Id Revision
File size: 23.4 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28#
29# metadata=Metaname -- all documents with Metaname metadata
30# will be included in list, list will be sorted
31# by this element.
32# buttonname=Title -- (optional) the title field for this classification.
33# if not included title field will be Metaname.
34# mingroup=Num -- (optional) the smallest value that will cause
35# a group in the hierarchy to form.
36# minnesting=Num -- (optional) the smallest value that will cause a
37# list to converted into nested list
38# mincompact=Num -- (optional) used in compact list
39# maxcompact=Num -- (optional) used in compact list
40# doclevel=top|section -- (optional) level to process document at.
41# onlyfirst -- (optional) control whether all or only first
42# metadata value used from array of metadata
43package AZCompactList;
44
45use BasClas;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52my $doclevel_list =
53 [ { 'name' => "top",
54 'desc' => "{AZCompactList.doclevel.top}" },
55 { 'name' => "section",
56 'desc' => "{AZCompactList.doclevel.section}" } ];
57
58my $arguments =
59 [ { 'name' => "metadata",
60 'desc' => "{AZCompactList.metadata}",
61 'type' => "metadata",
62 'reqd' => "yes" },
63 { 'name' => "buttonname",
64 'desc' => "{AZCompactList.buttonname}",
65 'type' => "string",
66 'deft' => "{AZCompactList.metadata.deft}",
67 'reqd' => "no" },
68 { 'name' => "removeprefix",
69 'desc' => "{AZCompactList.removeprefix}",
70 'type' => "string",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "removesuffix",
74 'desc' => "{AZCompactList.removesuffix}",
75 'type' => "string",
76 'deft' => "",
77 'reqd' => "no" },
78 { 'name' => "mingroup",
79 'desc' => "{AZCompactList.mingroup}",
80 'type' => "int",
81 'deft' => "2",
82 'reqd' => "no" },
83 { 'name' => "minnesting",
84 'desc' => "{AZCompactList.minnesting}",
85 'type' => "int",
86 'deft' => "20",
87 'reqd' => "no" },
88 { 'name' => "mincompact",
89 'desc' => "{AZCompactList.mincompact}",
90 'type' => "int",
91 'deft' => "10",
92 'reqd' => "no" },
93 { 'name' => "maxcompact",
94 'desc' => "{AZCompactList.maxcompact}",
95 'type' => "int",
96 'deft' => "30",
97 'reqd' => "no" },
98 { 'name' => "doclevel",
99 'desc' => "{AZCompactList.doclevel}",
100 'type' => "enum",
101 'list' => $doclevel_list,
102 'deft' => "top",
103 'reqd' => "no" },
104 { 'name' => "onlyfirst",
105 'desc' => "{AZCompactList.onlyfirst}",
106 'type' => "flag",
107 'reqd' => "no" },
108 { 'name' => "freqsort",
109 'desc' => "{AZCompactList.freqsort}",
110 'type' => "flag"},
111 { 'name' => "recopt",
112 'desc' => "{AZCompactList.recopt}",
113 'type' => "flag",
114 'deft' => "",
115 'reqd' => "no" } ];
116
117my $options =
118{ 'name' => "AZCompactList",
119 'desc' => "{AZCompactList.desc}",
120 'inherits' => "Yes",
121 'args' => $arguments };
122
123# sub print_usage {
124# print STDERR "
125# usage: classify AZCompactList -metadata X [options]
126# options:
127# -metadata X (required) Metadata field used for classification
128# -buttonname X Title to use on web pages (defaults to metadata)
129# -removeprefix regex pattern to remove from metadata before sorting
130# -doclevel top|section (Defaults to top)
131# -freqsort Sort by node frequency rather than alpha-numeric
132# -mingroup N Minimum num of documents required to form a new group
133# -minnesting N Minimum list size to become a nested list
134# -mincompact N Used in compact list
135# -maxcompact N Used in compact list
136# -onlyfirst Only use the first value if metadata is repeated.
137# -recopt
138# ";
139# }
140
141sub new {
142 my $class = shift (@_);
143 my $self = new BasClas($class, @_);
144
145 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
146 my $option_list = $self->{'option_list'};
147 push( @{$option_list}, $options );
148
149
150 my ($metaname, $title, $removeprefix, $removesuffix);
151 my $mingroup = 2;
152 my $minnesting = 20;
153 my $mincompact = 10;
154 my $maxcompact = 30;
155 my $doclevel = "top";
156 my $onlyfirst = 0;
157 my $freqsort = 0;
158 my $recopt = undef;
159
160 if (!parsargv::parse(\@_,
161 q^metadata/.*/^, \$metaname,
162 q^buttonname/.*/^, \$title,
163 q^removeprefix/.*/^, \$removeprefix,
164 q^removesuffix/.*/^, \$removesuffix,
165 q^mingroup/.*/2^, \$mingroup,
166 q^minnesting/.*/20^, \$minnesting,
167 q^mincompact/.*/10^, \$mincompact,
168 q^maxcompact/.*/30^, \$maxcompact,
169 q^doclevel/.*/top^, \$doclevel,
170 q^onlyfirst/.*/0^, \$onlyfirst,
171 q^freqsort/.*/0^, \$freqsort,
172 q^recopt/.*/-1^, \$recopt, # Used in nested metadata such as -metadata Year/Organisation
173
174 "allow_extra_options")) {
175
176 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
177 $self->print_txt_usage(""); # Use default resource bundle
178 die "\n";
179 }
180
181 if (!defined $metaname) {
182 my $outhandle = $self->{'outhandle'};
183 print $outhandle "AZCompactList used with no metadata name to classify by\n";
184 die "\n";
185 }
186
187 $title = $metaname unless ($title);
188
189 $self->{'list'} = {};
190 $self->{'listmetavalue'} = {};
191 $self->{'reclassify'} = {};
192 $self->{'reclassifylist'} = {};
193 $self->{'metaname'} = $metaname;
194 $self->{'title'} = "$title"; # title for the titlebar.
195 if (defined($removeprefix) && $removeprefix) {
196 $removeprefix =~ s/^\^//; # don't need a leading ^
197 $self->{'removeprefix'} = $removeprefix;
198 }
199 if (defined($removesuffix) && $removesuffix) {
200 $removesuffix =~ s/\$$//; # don't need a trailing $
201 $self->{'removesuffix'} = $removesuffix;
202 }
203 $self->{'mingroup'} = $mingroup;
204 $self->{'minnesting'} = $minnesting;
205 $self->{'mincompact'} = $mincompact;
206 $self->{'maxcompact'} = $maxcompact;
207 $self->{'doclevel'} = $doclevel;
208
209 if ($onlyfirst != 0) {
210 $onlyfirst = 1;
211 }
212 $self->{'onlyfirst'} = $onlyfirst;
213
214 if ($freqsort != 0) {
215 $freqsort = 1;
216 }
217 $self->{'freqsort'} = $freqsort;
218
219 if ($recopt == -1) {
220 $recopt = undef;
221 } else {
222 $recopt = "on";
223 }
224 $self->{'recopt'} = $recopt;
225
226 return bless $self, $class;
227}
228
229sub init
230{
231 my $self = shift (@_);
232
233 $self->{'list'} = {};
234 $self->{'listmetavalue'} = {};
235 $self->{'reclassify'} = {};
236 $self->{'reclassifylist'} = {};
237}
238
239$tmp = 0;
240
241sub classify
242{
243 my $self = shift (@_);
244 my ($doc_obj) = @_;
245
246 my $doc_OID = $doc_obj->get_OID();
247
248 my @sectionlist = ();
249 my $topsection = $doc_obj->get_top_section();
250
251 my $metaname = $self->{'metaname'};
252 my $outhandle = $self->{'outhandle'};
253
254 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
255
256 if ($self->{'doclevel'} =~ /^top(level)?/i)
257 {
258 push(@sectionlist,$topsection);
259 }
260 else
261 {
262 my $thissection = $doc_obj->get_next_section($topsection);
263 while (defined $thissection)
264 {
265 push(@sectionlist,$thissection);
266 $thissection = $doc_obj->get_next_section ($thissection);
267 }
268 }
269
270 my $thissection;
271 foreach $thissection (@sectionlist)
272 {
273 my $full_doc_OID
274 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
275
276 if (defined $self->{'list'}->{$full_doc_OID})
277 {
278 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
279 }
280 $self->{'list'}->{$full_doc_OID} = [];
281 $self->{'listmetavalue'}->{$full_doc_OID} = [];
282
283 my $metavalues = $doc_obj->get_metadata($thissection,$metaname);
284 my $metavalue;
285 foreach $metavalue (@$metavalues)
286 {
287 # if this document doesn't contain the metadata element we're
288 # sorting by we won't include it in this classification
289 if (defined $metavalue && $metavalue =~ /\w/)
290 {
291 if (defined($self->{'removeprefix'}) &&
292 length($self->{'removeprefix'})) {
293 $metavalue =~ s/^$self->{'removeprefix'}//;
294
295 # check that it's not now empty
296 if (!$metavalue) {next;}
297 }
298
299 if (defined($self->{'removesuffix'}) &&
300 length($self->{'removesuffix'})) {
301 $metavalue =~ s/$self->{'removesuffix'}$//;
302
303 # check that it's not now empty
304 if (!$metavalue) {next;}
305 }
306
307 my $formatted_metavalue = $metavalue;
308
309 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
310 {
311 &sorttools::format_string_name_english (\$formatted_metavalue);
312 }
313 else
314 {
315 &sorttools::format_string_english (\$formatted_metavalue);
316 }
317
318 #### prefix-str
319 if (! defined($formatted_metavalue)) {
320 print $outhandle "Warning: AZCompactList: metavalue is ";
321 print $outhandle "empty\n";
322 $formatted_metavalue="";
323 }
324
325 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
326 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
327
328 last if ($self->{'onlyfirst'});
329 }
330 }
331 my $date = $doc_obj->get_metadata_element($thissection,"Date");
332 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
333 }
334}
335
336sub reinit
337{
338 my ($self,$classlist_ref) = @_;
339 my $outhandle = $self->{'outhandle'};
340
341 my %mtfreq = ();
342 my @single_classlist = ();
343 my @multiple_classlist = ();
344
345 # find out how often each metavalue occurs
346 map
347 {
348 my $mv;
349 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
350 {
351 $mtfreq{$mv}++;
352 }
353 } @$classlist_ref;
354
355 # use this information to split the list: single metavalue/repeated value
356 map
357 {
358 my $i = 1;
359 my $metavalue;
360 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
361 {
362 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
363 {
364 push(@multiple_classlist,[$_,$i,$metavalue]);
365 }
366 else
367 {
368 push(@single_classlist,[$_,$metavalue]);
369 $metavalue =~ tr/[A-Z]/[a-z]/;
370 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
371 }
372 $i++;
373 }
374 } @$classlist_ref;
375
376
377 # Setup sub-classifiers for multiple list
378
379 $self->{'classifiers'} = {};
380
381 my $pm;
382 foreach $pm ("List", "SectionList")
383 {
384 my $listname
385 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
386 if (-e $listname) { require $listname; }
387 else
388 {
389 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
390 die "\n";
391 }
392 }
393
394 # Create classifiers objects for each entry >= mingroup
395 my $metavalue;
396 foreach $metavalue (keys %mtfreq)
397 {
398 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
399 {
400 my $listclassobj;
401 my $doclevel = $self->{'doclevel'};
402 my $metaname = $self->{'metaname'};
403 my $mingroup = $self->{'mingroup'};
404
405 my @metaname_list = split('/',$metaname);
406 $metaname = shift(@metaname_list);
407 if (@metaname_list==0)
408 {
409 my @args;
410 push @args, ("-metadata", "$metaname");
411 # buttonname is also used for the node's title
412 push @args, ("-buttonname", "$metavalue");
413 push @args, ("-sort", "Date");
414
415 if ($doclevel =~ m/^top(level)?/i)
416 {
417 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
418 }
419 else
420 {
421 eval ("\$listclassobj = new SectionList(\@args)");
422 }
423 }
424 else
425 {
426 $metaname = join('/',@metaname_list);
427
428 my @args;
429 push @args, ("-metadata", "$metaname");
430 # buttonname is also used for the node's title
431 push @args, ("-buttonname", "$metavalue");
432 push @args, ("-doclevel", "$doclevel");
433 push @args, ("-mingroup", $mingroup);
434 push @args, "-recopt ";
435
436 eval ("\$listclassobj = new AZCompactList(\@args)");
437 }
438 if ($@) {
439 print $outhandle "$@";
440 die "\n";
441 }
442
443 $listclassobj->init();
444
445 if (defined $metavalue && $metavalue =~ /\w/)
446 {
447 my $formatted_node = $metavalue;
448
449 if (defined($self->{'removeprefix'}) &&
450 length($self->{'removeprefix'})) {
451 $formatted_node =~ s/^$self->{'removeprefix'}//;
452 # check that it's not now empty
453 if (!$formatted_node) {next;}
454 }
455 if (defined($self->{'removesuffix'}) &&
456 length($self->{'removesuffix'})) {
457 $formatted_node =~ s/$self->{'removesuffix'}$//;
458 # check that it's not now empty
459 if (!$formatted_node) {next;}
460 }
461
462 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
463 {
464 &sorttools::format_string_name_english(\$formatted_node);
465 }
466 else
467 {
468 &sorttools::format_string_english(\$formatted_node);
469 }
470
471 # In case our formatted string is empty...
472 if (! defined($formatted_node)) {
473 print $outhandle "Warning: AZCompactList: metavalue is ";
474 print $outhandle "empty\n";
475 $formatted_node="";
476 }
477
478 $self->{'classifiers'}->{$metavalue}
479 = { 'classifyobj' => $listclassobj,
480 'formattednode' => $formatted_node };
481 }
482 }
483 }
484
485
486 return (\@single_classlist,\@multiple_classlist);
487}
488
489
490sub reclassify
491{
492 my ($self,$multiple_cl_ref) = @_;
493
494 # Entries in the current classify list that are "book nodes"
495 # should be recursively classified.
496 #--
497 foreach $dm_pair (@$multiple_cl_ref)
498 {
499 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
500 my $listclassobj;
501
502 # find metavalue in list of sub-classifiers
503 my $found = 0;
504 my $node_name;
505 foreach $node_name (keys %{$self->{'classifiers'}})
506 {
507 $resafe_node_name = $node_name;
508 # escape chars that mean something to perl...
509 $resafe_node_name =~ s/([\\\(\)\[\]\{\}\^\$\.\+\*\?\|])/\\$1/g;
510 if ($metavalue =~ m/^$resafe_node_name$/i)
511 {
512 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
513
514 ## date appears to not be used in classifier call ####
515
516 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
517 {
518 my $section=$1;
519 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
520 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
521 ->classify($doc_obj, "Section=$section");
522 } else { # section level
523 # Thanks to Don Gourley for this...
524 # classify can't handle multi-level section
525 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
526 ->classify_section($section, $doc_obj, $date);
527 }
528 }
529 else
530 {
531 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
532 ->classify($doc_obj);
533 }
534
535 $found = 1;
536 last;
537 }
538 }
539
540 if (!$found)
541 {
542 my $outhandle=$self->{outhandle};
543 print $outhandle "Warning: AZCompactList::reclassify ";
544 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
545 }
546 }
547}
548
549
550
551sub get_reclassify_info
552{
553 my $self = shift (@_);
554
555 my $node_name;
556 foreach $node_name (keys %{$self->{'classifiers'}})
557 {
558 my $classifyinfo
559 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
560 ->get_classify_info();
561 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
562 = $classifyinfo;
563 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
564 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
565 }
566}
567
568
569sub alpha_numeric_cmp
570{
571 my ($self,$a,$b) = @_;
572
573 my $title_a = $self->{'reclassifylist'}->{$a};
574 my $title_b = $self->{'reclassifylist'}->{$b};
575
576 if ($title_a =~ m/^(\d+(\.\d+)?)/)
577 {
578 my $val_a = $1;
579 if ($title_b =~ m/^(\d+(\.\d+)?)/)
580 {
581 my $val_b = $1;
582 if ($val_a != $val_b)
583 {
584 return ($val_a <=> $val_b);
585 }
586 }
587 }
588
589 return ($title_a cmp $title_b);
590}
591
592sub frequency_cmp
593{
594 my ($self,$a,$b) = @_;
595
596
597 my $title_a = $self->{'reclassifylist'}->{$a};
598 my $title_b = $self->{'reclassifylist'}->{$b};
599
600 my $a_freq = 1;
601 my $b_freq = 1;
602
603 if ($a =~ m/^CLASSIFY\.(.*)$/)
604 {
605 my $a_node = $1;
606 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
607 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
608 }
609
610 if ($b =~ m/^CLASSIFY\.(.*)$/)
611 {
612 my $b_node = $1;
613 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
614 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
615 }
616
617 return $b_freq <=> $a_freq;
618}
619
620sub get_classify_info {
621 my $self = shift (@_);
622
623 my @classlist =keys %{$self->{'list'}}; # list all doc oids
624
625 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
626 $self->reclassify($multiple_cl_ref);
627 $self->get_reclassify_info();
628
629 my @reclassified_classlist;
630 if ($self->{'freqsort'})
631 {
632 @reclassified_classlist
633 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
634 # supress sub-grouping by alphabet
635 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
636 }
637 else
638 {
639# @reclassified_classlist
640# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
641
642 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
643
644 @reclassified_classlist
645 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
646
647 }
648
649 return $self->splitlist (\@reclassified_classlist);
650}
651
652sub get_entry {
653 my $self = shift (@_);
654 my ($title, $childtype, $metaname, $thistype) = @_;
655
656 # organise into classification structure
657 my %classifyinfo = ('childtype'=>$childtype,
658 'Title'=>$title,
659 'contains'=>[],
660 'mdtype'=>$metaname);
661
662 $classifyinfo{'thistype'} = $thistype
663 if defined $thistype && $thistype =~ /\w/;
664
665 return \%classifyinfo;
666}
667
668
669
670# splitlist takes an ordered list of classifications (@$classlistref) and
671# splits it up into alphabetical sub-sections.
672sub splitlist {
673 my $self = shift (@_);
674 my ($classlistref) = @_;
675 my $classhash = {};
676
677 # top level
678 my @metanames = split("/",$self->{'metaname'});
679 my $metaname = shift(@metanames);
680
681 my $childtype = "HList";
682 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
683
684 my $classifyinfo;
685 if (!defined($self->{'recopt'}))
686 {
687 my $title = $self->{'title'}; # should always be defined by now....
688 $title = $metaname unless defined $title;
689 $classifyinfo
690 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
691 }
692 else
693 {
694 my $title = $self->{'title'};
695 $classifyinfo
696 = $self->get_entry ($title, $childtype, $metaname, "VList");
697 }
698
699 # don't need to do any splitting if there are less than 'minnesting' classifications
700 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
701 foreach $subOID (@$classlistref) {
702 if ($subOID =~ /^CLASSIFY\.(.*)$/
703 && defined $self->{'classifiers'}->{$1})
704 {
705 push (@{$classifyinfo->{'contains'}},
706 $self->{'classifiers'}->{$1}->{'classifyinfo'});
707 }
708 else
709 {
710 $subOID =~ s/^Metavalue_(\d+)\.//;
711 my $metaname_offset = $1 -1;
712 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
713 push (@{$classifyinfo->{'contains'}}, $oid_rec);
714 }
715 }
716 return $classifyinfo;
717 }
718
719 # first split up the list into separate A-Z and 0-9 classifications
720 foreach $classification (@$classlistref) {
721 my $title = $self->{'reclassifylist'}->{$classification};
722 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
723### $title =~ s/^\s+//g; # remove a leading spaces
724### $title =~ s/^_+//g; # remove a leading underscores
725 $title =~ s/^\W+//g;
726### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
727
728# only want first character for classification
729 $title =~ m/^(.)/; $title=$1;
730 $title =~ tr/[a-z]/[A-Z]/;
731
732 if ($title =~ /^[0-9]$/) {$title = '0-9';}
733 elsif ($title !~ /^[A-Z]$/) {
734 my $outhandle = $self->{'outhandle'};
735 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
736 }
737 $classhash->{$title} = [] unless defined $classhash->{$title};
738 push (@{$classhash->{$title}}, $classification);
739 }
740 $classhash = $self->compactlist ($classhash);
741
742 my @tmparr = ();
743 foreach $subsection (sort keys (%$classhash)) {
744 push (@tmparr, $subsection);
745 }
746
747 # if there's a 0-9 section it will have been sorted to the beginning
748 # but we want it at the end
749 if ($tmparr[0] eq '0-9') {
750 shift @tmparr;
751 push (@tmparr, '0-9');
752 }
753
754 foreach $subclass (@tmparr)
755 {
756 my $tempclassify
757 = (scalar(@tmparr)==1)
758 ? ($self->get_entry(" ", "VList", $metaname))
759 : ($self->get_entry($subclass, "VList", $metaname));
760
761
762 foreach $subsubOID (@{$classhash->{$subclass}})
763 {
764 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
765 && defined $self->{'classifiers'}->{$1})
766 {
767 push (@{$tempclassify->{'contains'}},
768 $self->{'classifiers'}->{$1}->{'classifyinfo'});
769 }
770 else
771 {
772 $subsubOID =~ s/^Metavalue_(\d+)\.//;
773 my $metaname_offset = $1 -1;
774 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
775 push (@{$tempclassify->{'contains'}}, $oid_rec);
776 }
777 }
778 push (@{$classifyinfo->{'contains'}}, $tempclassify);
779 }
780
781 return $classifyinfo;
782}
783
784sub compactlist {
785 my $self = shift (@_);
786 my ($classhashref) = @_;
787 my $compactedhash = {};
788 my @currentOIDs = ();
789 my $currentfirstletter = "";
790 my $currentlastletter = "";
791 my $lastkey = "";
792
793 # minimum and maximum documents to be displayed per page.
794 # the actual maximum will be max + (min-1).
795 # the smallest sub-section is a single letter at present
796 # so in this case there may be many times max documents
797 # displayed on a page.
798 my $min = $self->{'mincompact'};
799 my $max = $self->{'maxcompact'};
800
801 foreach $subsection (sort keys %$classhashref) {
802 if ($subsection eq '0-9') {
803 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
804 next;
805 }
806 $currentfirstletter = $subsection if $currentfirstletter eq "";
807 if ((scalar (@currentOIDs) < $min) ||
808 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
809 push (@currentOIDs, @{$classhashref->{$subsection}});
810 $currentlastletter = $subsection;
811 } else {
812
813 if ($currentfirstletter eq $currentlastletter) {
814 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
815 $lastkey = $currentfirstletter;
816 } else {
817 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
818 $lastkey = "$currentfirstletter-$currentlastletter";
819 }
820 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
821 $compactedhash->{$subsection} = $classhashref->{$subsection};
822 @currentOIDs = ();
823 $currentfirstletter = "";
824 } else {
825 @currentOIDs = @{$classhashref->{$subsection}};
826 $currentfirstletter = $subsection;
827 $currentlastletter = $subsection;
828 }
829 }
830 }
831
832 # add final OIDs to last sub-classification if there aren't many otherwise
833 # add final sub-classification
834 if (scalar (@currentOIDs) < $min) {
835 my ($newkey) = $lastkey =~ /^(.)/;
836 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
837 delete $compactedhash->{$lastkey};
838 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
839 } else {
840 if ($currentfirstletter eq $currentlastletter) {
841 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
842 } else {
843 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
844 }
845 }
846
847 return $compactedhash;
848}
849
8501;
851
852
Note: See TracBrowser for help on using the repository browser.