source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 7406

Last change on this file since 7406 was 7406, checked in by davidb, 20 years ago

Two changes made: the first to allow compact nodes to be formed ignoring
case sensitivity; the second allows for the metadata field to be a comma
separated list in the same way AZList allows.

  • Property svn:keywords set to Author Date Id Revision
File size: 22.9 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
26# options are:
27
28package AZCompactList;
29
30use BasClas;
31use sorttools;
32
33sub BEGIN {
34 @ISA = ('BasClas');
35}
36
37my $doclevel_list =
38 [ { 'name' => "top",
39 'desc' => "{AZCompactList.doclevel.top}" },
40 { 'name' => "section",
41 'desc' => "{AZCompactList.doclevel.section}" } ];
42
43my $arguments =
44 [ { 'name' => "metadata",
45 'desc' => "{AZCompactList.metadata}",
46 'type' => "metadata",
47 'reqd' => "yes" },
48 { 'name' => "buttonname",
49 'desc' => "{BasClas.buttonname}",
50 'type' => "string",
51 'deft' => "{BasClas.metadata.deft}",
52 'reqd' => "no" },
53 { 'name' => "sort",
54 'desc' => "{AZCompactList.sort}",
55 'type' => "string",
56 'deft' => "Title",
57 'reqd' => "no" },
58 { 'name' => "removeprefix",
59 'desc' => "{AZCompactList.removeprefix}",
60 'type' => "regexp",
61 'deft' => "",
62 'reqd' => "no" },
63 { 'name' => "removesuffix",
64 'desc' => "{AZCompactList.removesuffix}",
65 'type' => "regexp",
66 'deft' => "",
67 'reqd' => "no" },
68 { 'name' => "mingroup",
69 'desc' => "{AZCompactList.mingroup}",
70 'type' => "int",
71 'deft' => "2",
72 'reqd' => "no" },
73 { 'name' => "minnesting",
74 'desc' => "{AZCompactList.minnesting}",
75 'type' => "int",
76 'deft' => "20",
77 'reqd' => "no" },
78 { 'name' => "mincompact",
79 'desc' => "{AZCompactList.mincompact}",
80 'type' => "int",
81 'deft' => "10",
82 'reqd' => "no" },
83 { 'name' => "maxcompact",
84 'desc' => "{AZCompactList.maxcompact}",
85 'type' => "int",
86 'deft' => "30",
87 'reqd' => "no" },
88 { 'name' => "doclevel",
89 'desc' => "{AZCompactList.doclevel}",
90 'type' => "enum",
91 'list' => $doclevel_list,
92 'deft' => "top",
93 'reqd' => "no" },
94 { 'name' => "onlyfirst",
95 'desc' => "{AZCompactList.onlyfirst}",
96 'type' => "flag",
97 'reqd' => "no" },
98 { 'name' => "freqsort",
99 'desc' => "{AZCompactList.freqsort}",
100 'type' => "flag"},
101 { 'name' => "recopt",
102 'desc' => "{AZCompactList.recopt}",
103 'type' => "flag",
104 'reqd' => "no" } ];
105
106my $options =
107{ 'name' => "AZCompactList",
108 'desc' => "{AZCompactList.desc}",
109 'abstract' => "no",
110 'inherits' => "yes",
111 'args' => $arguments };
112
113
114sub new {
115 my $class = shift (@_);
116 my $self = new BasClas($class, @_);
117
118 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
119 my $option_list = $self->{'option_list'};
120 push( @{$option_list}, $options );
121
122 if ($self->{'info_only'}) {
123 # created from classinfo.pl - don't need to parse the arguments
124 return bless $self, $class;
125 }
126
127 my ($metaname, $title, $removeprefix, $removesuffix);
128 my $sortname = "Title";
129 my $mingroup = 2;
130 my $minnesting = 20;
131 my $mincompact = 10;
132 my $maxcompact = 30;
133 my $doclevel = "top";
134 my $onlyfirst = 0;
135 my $freqsort = 0;
136 my $recopt = undef;
137
138 if (!parsargv::parse(\@_,
139 q^metadata/.*/^, \$metaname,
140 q^buttonname/.*/^, \$title,
141 q^sort/.*/^, \$sortname,
142 q^removeprefix/.*/^, \$removeprefix,
143 q^removesuffix/.*/^, \$removesuffix,
144 q^mingroup/.*/2^, \$mingroup,
145 q^minnesting/.*/20^, \$minnesting,
146 q^mincompact/.*/10^, \$mincompact,
147 q^maxcompact/.*/30^, \$maxcompact,
148 q^doclevel/.*/top^, \$doclevel,
149 q^onlyfirst^, \$onlyfirst,
150 q^freqsort/.*/0^, \$freqsort,
151 q^recopt/.*/-1^, \$recopt, # Used in nested metadata such as -metadata Year/Organisation
152
153 "allow_extra_options")) {
154
155 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
156 $self->print_txt_usage(""); # Use default resource bundle
157 die "\n";
158 }
159
160 if (!$metaname) {
161 my $outhandle = $self->{'outhandle'};
162 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
163 $self->print_txt_usage("");
164 die "AZCompactList Error: required option -metadata not supplied\n";
165 }
166
167 $title = $metaname unless ($title);
168
169 $self->{'list'} = {};
170 $self->{'listmetavalue'} = {};
171 $self->{'list_mvpair'} = {};
172 $self->{'reclassify'} = {};
173 $self->{'reclassifylist'} = {};
174 $self->{'metaname'} = $metaname;
175 $self->{'title'} = "$title"; # title for the titlebar.
176 $self->{'sortname'} = $sortname;
177 if (defined($removeprefix) && $removeprefix) {
178 $removeprefix =~ s/^\^//; # don't need a leading ^
179 $self->{'removeprefix'} = $removeprefix;
180 }
181 if (defined($removesuffix) && $removesuffix) {
182 $removesuffix =~ s/\$$//; # don't need a trailing $
183 $self->{'removesuffix'} = $removesuffix;
184 }
185 $self->{'mingroup'} = $mingroup;
186 $self->{'minnesting'} = $minnesting;
187 $self->{'mincompact'} = $mincompact;
188 $self->{'maxcompact'} = $maxcompact;
189 $self->{'doclevel'} = $doclevel;
190
191 if ($onlyfirst != 0) {
192 $onlyfirst = 1;
193 }
194 $self->{'onlyfirst'} = $onlyfirst;
195
196 if ($freqsort != 0) {
197 $freqsort = 1;
198 }
199 $self->{'freqsort'} = $freqsort;
200
201 if ($recopt == -1) {
202 $recopt = undef;
203 } else {
204 $recopt = "on";
205 }
206 $self->{'recopt'} = $recopt;
207
208 return bless $self, $class;
209}
210
211sub init
212{
213 my $self = shift (@_);
214
215 $self->{'list'} = {};
216 $self->{'listmetavalue'} = {};
217 $self->{'list_mvpair'} = {};
218 $self->{'reclassify'} = {};
219 $self->{'reclassifylist'} = {};
220}
221
222$tmp = 0;
223
224sub classify
225{
226 my $self = shift (@_);
227 my ($doc_obj) = @_;
228
229 my $doc_OID = $doc_obj->get_OID();
230
231 my @sectionlist = ();
232 my $topsection = $doc_obj->get_top_section();
233
234 my $metaname = $self->{'metaname'};
235 my $outhandle = $self->{'outhandle'};
236
237 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
238 my @commameta_list = split(/,/, $metaname);
239
240 if ($self->{'doclevel'} =~ /^top(level)?/i)
241 {
242 push(@sectionlist,$topsection);
243 }
244 else
245 {
246 my $thissection = $doc_obj->get_next_section($topsection);
247 while (defined $thissection)
248 {
249 push(@sectionlist,$thissection);
250 $thissection = $doc_obj->get_next_section ($thissection);
251 }
252 }
253
254 my $thissection;
255 foreach $thissection (@sectionlist)
256 {
257 my $full_doc_OID
258 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
259
260 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
261 {
262 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
263 }
264 $self->{'list'}->{$full_doc_OID} = [];
265 $self->{'listmetavalue'}->{$full_doc_OID} = [];
266 $self->{'list_mvpair'}->{$full_doc_OID} = [];
267
268 my $metavalues = [];
269 foreach my $cmn (@commameta_list) {
270 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
271 push(@$metavalues,@$cmvalues) if (defined $cmvalues);
272 }
273
274 my $metavalue;
275 foreach $metavalue (@$metavalues)
276 {
277 # Tidy up use of white space in metavalue for classifying
278 $metavalue =~ s/^\s*//s;
279 $metavalue =~ s/\s*$//s;
280 $metavalue =~ s/\n/ /s;
281 $metavalue =~ s/\s{2,}/ /s;
282
283 # if this document doesn't contain the metadata element we're
284 # sorting by we won't include it in this classification
285 if (defined $metavalue && $metavalue =~ /\w/)
286 {
287 if (defined($self->{'removeprefix'}) &&
288 length($self->{'removeprefix'})) {
289 $metavalue =~ s/^$self->{'removeprefix'}//;
290
291 # check that it's not now empty
292 if (!$metavalue) {next;}
293 }
294
295 if (defined($self->{'removesuffix'}) &&
296 length($self->{'removesuffix'})) {
297 $metavalue =~ s/$self->{'removesuffix'}$//;
298
299 # check that it's not now empty
300 if (!$metavalue) {next;}
301 }
302
303 my $formatted_metavalue = $metavalue;
304 $formatted_metavalue = $self->format_metadata_for_sorting($self->{'metaname'}, $formatted_metavalue, $doc_obj);
305
306 #### prefix-str
307 if (! defined($formatted_metavalue)) {
308 print $outhandle "Warning: AZCompactList: metavalue is ";
309 print $outhandle "empty\n";
310 $formatted_metavalue="";
311 }
312
313 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
314 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
315 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
316 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
317
318
319 last if ($self->{'onlyfirst'});
320 }
321 }
322 my $date = $doc_obj->get_metadata_element($thissection,"Date");
323 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
324 }
325}
326
327sub reinit
328{
329 my ($self,$classlist_ref) = @_;
330 my $outhandle = $self->{'outhandle'};
331
332 my %mtfreq = ();
333 my @single_classlist = ();
334 my @multiple_classlist = ();
335
336 # find out how often each metavalue occurs
337 map
338 {
339 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
340 {
341### print STDERR "*** plain mv = $mvp->{'mv'}\n";
342### print STDERR "*** format mv = $mvp->{'fmv'}\n";
343
344 my $metavalue = $mvp->{'mv'};
345 $mtfreq{$metavalue}++;
346 }
347 } @$classlist_ref;
348
349 # use this information to split the list: single metavalue/repeated value
350 map
351 {
352 my $i = 1;
353 my $metavalue;
354 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
355 {
356 my $metavalue = $mvp->{'mv'};
357 my $cs_metavalue = $mvp->{'mv'}; # case sensative
358 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
359 {
360### print STDERR "*** pushing on $cs_metavalue\n";
361
362 push(@multiple_classlist,[$_,$i,$metavalue]);
363 }
364 else
365 {
366 push(@single_classlist,[$_,$cs_metavalue]);
367 $metavalue =~ tr/[A-Z]/[a-z]/;
368 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
369 }
370 $i++;
371 }
372 } @$classlist_ref;
373
374
375 # Setup sub-classifiers for multiple list
376
377 $self->{'classifiers'} = {};
378
379 my $pm;
380 foreach $pm ("List", "SectionList")
381 {
382 my $listname
383 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
384 if (-e $listname) { require $listname; }
385 else
386 {
387 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
388 die "\n";
389 }
390 }
391
392 # Create classifiers objects for each entry >= mingroup
393 my $metavalue;
394 foreach $metavalue (keys %mtfreq)
395 {
396 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
397 {
398 my $listclassobj;
399 my $doclevel = $self->{'doclevel'};
400 my $metaname = $self->{'metaname'};
401 my $mingroup = $self->{'mingroup'};
402
403 my @metaname_list = split('/',$metaname);
404 $metaname = shift(@metaname_list);
405 if (@metaname_list==0)
406 {
407 my @args;
408 push @args, ("-metadata", "$metaname");
409 # buttonname is also used for the node's title
410 push @args, ("-buttonname", "$metavalue");
411 push @args, ("-sort", $self->{'sortname'});
412
413 if ($doclevel =~ m/^top(level)?/i)
414 {
415 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
416 }
417 else
418 {
419 eval ("\$listclassobj = new SectionList(\@args)");
420 }
421 }
422 else
423 {
424 $metaname = join('/',@metaname_list);
425
426 my @args;
427 push @args, ("-metadata", "$metaname");
428 # buttonname is also used for the node's title
429 push @args, ("-buttonname", "$metavalue");
430 push @args, ("-doclevel", "$doclevel");
431 push @args, ("-mingroup", $mingroup);
432 push @args, "-recopt ";
433
434 eval ("\$listclassobj = new AZCompactList(\@args)");
435 }
436 if ($@) {
437 print $outhandle "$@";
438 die "\n";
439 }
440
441 $listclassobj->init();
442
443 if (defined $metavalue && $metavalue =~ /\w/)
444 {
445 my $formatted_node = $metavalue;
446
447 if (defined($self->{'removeprefix'}) &&
448 length($self->{'removeprefix'})) {
449 $formatted_node =~ s/^$self->{'removeprefix'}//;
450 # check that it's not now empty
451 if (!$formatted_node) {next;}
452 }
453 if (defined($self->{'removesuffix'}) &&
454 length($self->{'removesuffix'})) {
455 $formatted_node =~ s/$self->{'removesuffix'}$//;
456 # check that it's not now empty
457 if (!$formatted_node) {next;}
458 }
459
460 $formatted_node = $self->format_metadata_for_sorting($self->{'metaname'}, $formatted_node);
461
462 # In case our formatted string is empty...
463 if (! defined($formatted_node)) {
464 print $outhandle "Warning: AZCompactList: metavalue is ";
465 print $outhandle "empty\n";
466 $formatted_node="";
467 }
468
469 # use the lower case, for speed of lookup.
470 my $meta_lc=lc($metavalue);
471 $self->{'classifiers'}->{$meta_lc}
472 = { 'classifyobj' => $listclassobj,
473 'formattednode' => $formatted_node };
474 }
475 }
476 }
477
478
479 return (\@single_classlist,\@multiple_classlist);
480}
481
482
483sub reclassify
484{
485 my ($self,$multiple_cl_ref) = @_;
486
487 # Entries in the current classify list that are "book nodes"
488 # should be recursively classified.
489 #--
490 foreach $dm_pair (@$multiple_cl_ref)
491 {
492 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
493 my $listclassobj;
494
495 # find metavalue in list of sub-classifiers
496 # check if we have a key (lower case) for this metadata value
497 my $node_name=lc($metavalue);
498 if (exists $self->{'classifiers'}->{$node_name})
499 {
500 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
501
502 ## date appears to not be used in classifier call ####
503
504 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
505 {
506 my $section=$1;
507 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
508 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
509 ->classify($doc_obj, "Section=$section");
510 } else { # section level
511 # Thanks to Don Gourley for this...
512 # classify can't handle multi-level section
513 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
514 ->classify_section($section, $doc_obj, $date);
515 }
516 }
517 else
518 {
519 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
520 ->classify($doc_obj);
521 }
522 } else { # this key is not in the hash
523 my $outhandle=$self->{outhandle};
524 print $outhandle "Warning: AZCompactList::reclassify ";
525 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
526 }
527 }
528}
529
530
531
532sub get_reclassify_info
533{
534 my $self = shift (@_);
535
536 my $node_name;
537 foreach $node_name (keys %{$self->{'classifiers'}})
538 {
539 my $classifyinfo
540 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
541 ->get_classify_info();
542 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
543 = $classifyinfo;
544 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
545 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
546 }
547}
548
549
550sub alpha_numeric_cmp
551{
552 my ($self,$a,$b) = @_;
553
554 my $title_a = $self->{'reclassifylist'}->{$a};
555 my $title_b = $self->{'reclassifylist'}->{$b};
556
557 if ($title_a =~ m/^(\d+(\.\d+)?)/)
558 {
559 my $val_a = $1;
560 if ($title_b =~ m/^(\d+(\.\d+)?)/)
561 {
562 my $val_b = $1;
563 if ($val_a != $val_b)
564 {
565 return ($val_a <=> $val_b);
566 }
567 }
568 }
569
570 return ($title_a cmp $title_b);
571}
572
573sub frequency_cmp
574{
575 my ($self,$a,$b) = @_;
576
577
578 my $title_a = $self->{'reclassifylist'}->{$a};
579 my $title_b = $self->{'reclassifylist'}->{$b};
580
581 my $a_freq = 1;
582 my $b_freq = 1;
583
584 if ($a =~ m/^CLASSIFY\.(.*)$/)
585 {
586 my $a_node = $1;
587 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
588 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
589 }
590
591 if ($b =~ m/^CLASSIFY\.(.*)$/)
592 {
593 my $b_node = $1;
594 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
595 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
596 }
597
598 return $b_freq <=> $a_freq;
599}
600
601sub get_classify_info {
602 my $self = shift (@_);
603
604 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
605
606 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
607 $self->reclassify($multiple_cl_ref);
608 $self->get_reclassify_info();
609
610 my @reclassified_classlist;
611 if ($self->{'freqsort'})
612 {
613 @reclassified_classlist
614 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
615 # supress sub-grouping by alphabet
616 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
617 }
618 else
619 {
620# @reclassified_classlist
621# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
622
623 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
624
625 @reclassified_classlist
626 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
627
628 }
629
630 return $self->splitlist (\@reclassified_classlist);
631}
632
633sub get_entry {
634 my $self = shift (@_);
635 my ($title, $childtype, $metaname, $thistype) = @_;
636
637 # organise into classification structure
638 my %classifyinfo = ('childtype'=>$childtype,
639 'Title'=>$title,
640 'contains'=>[],
641 'mdtype'=>$metaname);
642
643 $classifyinfo{'thistype'} = $thistype
644 if defined $thistype && $thistype =~ /\w/;
645
646 return \%classifyinfo;
647}
648
649
650
651# splitlist takes an ordered list of classifications (@$classlistref) and
652# splits it up into alphabetical sub-sections.
653sub splitlist {
654 my $self = shift (@_);
655 my ($classlistref) = @_;
656 my $classhash = {};
657
658 # top level
659 my @metanames = split("/",$self->{'metaname'});
660 my $metaname = shift(@metanames);
661
662 my $childtype = "HList";
663 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
664
665 my $classifyinfo;
666 if (!defined($self->{'recopt'}))
667 {
668 my $title = $self->{'title'}; # should always be defined by now....
669 $title = $metaname unless defined $title;
670 $classifyinfo
671 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
672 }
673 else
674 {
675 my $title = $self->{'title'};
676 $classifyinfo
677 = $self->get_entry ($title, $childtype, $metaname, "VList");
678 }
679
680 # don't need to do any splitting if there are less than 'minnesting' classifications
681 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
682 foreach $subOID (@$classlistref) {
683 if ($subOID =~ /^CLASSIFY\.(.*)$/
684 && defined $self->{'classifiers'}->{$1})
685 {
686 push (@{$classifyinfo->{'contains'}},
687 $self->{'classifiers'}->{$1}->{'classifyinfo'});
688 }
689 else
690 {
691 $subOID =~ s/^Metavalue_(\d+)\.//;
692 my $metaname_offset = $1 -1;
693 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
694 push (@{$classifyinfo->{'contains'}}, $oid_rec);
695 }
696 }
697 return $classifyinfo;
698 }
699
700 # first split up the list into separate A-Z and 0-9 classifications
701 foreach $classification (@$classlistref) {
702 my $title = $self->{'reclassifylist'}->{$classification};
703 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
704 $title =~ s/^\W+//g; # remove leading non-word chars
705
706 # only want first character for classification
707 $title =~ m/^(.)/;
708 if ($1) {
709 $title=$1;
710 } else {
711 print STDERR "no first character found for \"$title\" - \"" .
712 $self->{'reclassifylist'}->{$classification} . "\"\n";
713 }
714 $title =~ tr/[a-z]/[A-Z]/;
715
716 if ($title =~ /^[0-9]$/) {$title = '0-9';}
717 elsif ($title !~ /^[A-Z]$/) {
718 my $outhandle = $self->{'outhandle'};
719 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
720 }
721 $classhash->{$title} = [] unless defined $classhash->{$title};
722 push (@{$classhash->{$title}}, $classification);
723 }
724 $classhash = $self->compactlist ($classhash);
725
726 my @tmparr = ();
727 foreach $subsection (sort keys (%$classhash)) {
728 push (@tmparr, $subsection);
729 }
730
731 # if there's a 0-9 section it will have been sorted to the beginning
732 # but we want it at the end
733 if ($tmparr[0] eq '0-9') {
734 shift @tmparr;
735 push (@tmparr, '0-9');
736 }
737
738 foreach $subclass (@tmparr)
739 {
740 my $tempclassify
741 = (scalar(@tmparr)==1)
742 ? ($self->get_entry(" ", "VList", $metaname))
743 : ($self->get_entry($subclass, "VList", $metaname));
744
745
746 foreach $subsubOID (@{$classhash->{$subclass}})
747 {
748 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
749 && defined $self->{'classifiers'}->{$1})
750 {
751 push (@{$tempclassify->{'contains'}},
752 $self->{'classifiers'}->{$1}->{'classifyinfo'});
753 }
754 else
755 {
756 $subsubOID =~ s/^Metavalue_(\d+)\.//;
757 my $metaname_offset = $1 -1;
758 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
759 push (@{$tempclassify->{'contains'}}, $oid_rec);
760 }
761 }
762 push (@{$classifyinfo->{'contains'}}, $tempclassify);
763 }
764
765 return $classifyinfo;
766}
767
768sub compactlist {
769 my $self = shift (@_);
770 my ($classhashref) = @_;
771 my $compactedhash = {};
772 my @currentOIDs = ();
773 my $currentfirstletter = "";
774 my $currentlastletter = "";
775 my $lastkey = "";
776
777 # minimum and maximum documents to be displayed per page.
778 # the actual maximum will be max + (min-1).
779 # the smallest sub-section is a single letter at present
780 # so in this case there may be many times max documents
781 # displayed on a page.
782 my $min = $self->{'mincompact'};
783 my $max = $self->{'maxcompact'};
784
785 foreach $subsection (sort keys %$classhashref) {
786 if ($subsection eq '0-9') {
787 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
788 next;
789 }
790 $currentfirstletter = $subsection if $currentfirstletter eq "";
791 if ((scalar (@currentOIDs) < $min) ||
792 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
793 push (@currentOIDs, @{$classhashref->{$subsection}});
794 $currentlastletter = $subsection;
795 } else {
796
797 if ($currentfirstletter eq $currentlastletter) {
798 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
799 $lastkey = $currentfirstletter;
800 } else {
801 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
802 $lastkey = "$currentfirstletter-$currentlastletter";
803 }
804 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
805 $compactedhash->{$subsection} = $classhashref->{$subsection};
806 @currentOIDs = ();
807 $currentfirstletter = "";
808 $lastkey=$subsection;
809 } else {
810 @currentOIDs = @{$classhashref->{$subsection}};
811 $currentfirstletter = $subsection;
812 $currentlastletter = $subsection;
813 }
814 }
815 }
816
817 # add final OIDs to last sub-classification if there aren't many otherwise
818 # add final sub-classification
819
820 # don't add if there aren't any oids
821 if (! scalar (@currentOIDs)) {return $compactedhash;}
822
823 if (scalar (@currentOIDs) < $min) {
824 my ($newkey) = $lastkey =~ /^(.)/;
825 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
826 delete $compactedhash->{$lastkey};
827 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
828 } else {
829 if ($currentfirstletter eq $currentlastletter) {
830 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
831 } else {
832 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
833 }
834 }
835
836 return $compactedhash;
837}
838
8391;
840
841
Note: See TracBrowser for help on using the repository browser.