source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 6084

Last change on this file since 6084 was 6084, checked in by jrm21, 20 years ago

finally tracked down and fixed problem with last letter being out of
order in some circumstances.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.5 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28#
29# metadata=Metaname -- all documents with Metaname metadata
30# will be included in list, list will be sorted
31# by this element.
32# buttonname=Title -- (optional) the title field for this classification.
33# if not included title field will be Metaname.
34# mingroup=Num -- (optional) the smallest value that will cause
35# a group in the hierarchy to form.
36# minnesting=Num -- (optional) the smallest value that will cause a
37# list to converted into nested list
38# mincompact=Num -- (optional) used in compact list
39# maxcompact=Num -- (optional) used in compact list
40# doclevel=top|section -- (optional) level to process document at.
41# onlyfirst -- (optional) control whether all or only first
42# metadata value used from array of metadata
43package AZCompactList;
44
45use BasClas;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52my $doclevel_list =
53 [ { 'name' => "top",
54 'desc' => "{AZCompactList.doclevel.top}" },
55 { 'name' => "section",
56 'desc' => "{AZCompactList.doclevel.section}" } ];
57
58my $arguments =
59 [ { 'name' => "metadata",
60 'desc' => "{AZCompactList.metadata}",
61 'type' => "metadata",
62 'reqd' => "yes" },
63 { 'name' => "buttonname",
64 'desc' => "{AZCompactList.buttonname}",
65 'type' => "string",
66 'deft' => "{AZCompactList.metadata.deft}",
67 'reqd' => "no" },
68 { 'name' => "removeprefix",
69 'desc' => "{AZCompactList.removeprefix}",
70 'type' => "string",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "removesuffix",
74 'desc' => "{AZCompactList.removesuffix}",
75 'type' => "string",
76 'deft' => "",
77 'reqd' => "no" },
78 { 'name' => "mingroup",
79 'desc' => "{AZCompactList.mingroup}",
80 'type' => "int",
81 'deft' => "2",
82 'reqd' => "no" },
83 { 'name' => "minnesting",
84 'desc' => "{AZCompactList.minnesting}",
85 'type' => "int",
86 'deft' => "20",
87 'reqd' => "no" },
88 { 'name' => "mincompact",
89 'desc' => "{AZCompactList.mincompact}",
90 'type' => "int",
91 'deft' => "10",
92 'reqd' => "no" },
93 { 'name' => "maxcompact",
94 'desc' => "{AZCompactList.maxcompact}",
95 'type' => "int",
96 'deft' => "30",
97 'reqd' => "no" },
98 { 'name' => "doclevel",
99 'desc' => "{AZCompactList.doclevel}",
100 'type' => "enum",
101 'list' => $doclevel_list,
102 'deft' => "top",
103 'reqd' => "no" },
104 { 'name' => "onlyfirst",
105 'desc' => "{AZCompactList.onlyfirst}",
106 'type' => "flag",
107 'reqd' => "no" },
108 { 'name' => "freqsort",
109 'desc' => "{AZCompactList.freqsort}",
110 'type' => "flag"},
111 { 'name' => "recopt",
112 'desc' => "{AZCompactList.recopt}",
113 'type' => "flag",
114 'deft' => "",
115 'reqd' => "no" } ];
116
117my $options =
118{ 'name' => "AZCompactList",
119 'desc' => "{AZCompactList.desc}",
120 'inherits' => "Yes",
121 'args' => $arguments };
122
123# sub print_usage {
124# print STDERR "
125# usage: classify AZCompactList -metadata X [options]
126# options:
127# -metadata X (required) Metadata field used for classification
128# -buttonname X Title to use on web pages (defaults to metadata)
129# -removeprefix regex pattern to remove from metadata before sorting
130# -doclevel top|section (Defaults to top)
131# -freqsort Sort by node frequency rather than alpha-numeric
132# -mingroup N Minimum num of documents required to form a new group
133# -minnesting N Minimum list size to become a nested list
134# -mincompact N Used in compact list
135# -maxcompact N Used in compact list
136# -onlyfirst Only use the first value if metadata is repeated.
137# -recopt
138# ";
139# }
140
141sub new {
142 my $class = shift (@_);
143 my $self = new BasClas($class, @_);
144
145 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
146 my $option_list = $self->{'option_list'};
147 push( @{$option_list}, $options );
148
149
150 my ($metaname, $title, $removeprefix, $removesuffix);
151 my $mingroup = 2;
152 my $minnesting = 20;
153 my $mincompact = 10;
154 my $maxcompact = 30;
155 my $doclevel = "top";
156 my $onlyfirst = 0;
157 my $freqsort = 0;
158 my $recopt = undef;
159
160 if (!parsargv::parse(\@_,
161 q^metadata/.*/^, \$metaname,
162 q^buttonname/.*/^, \$title,
163 q^removeprefix/.*/^, \$removeprefix,
164 q^removesuffix/.*/^, \$removesuffix,
165 q^mingroup/.*/2^, \$mingroup,
166 q^minnesting/.*/20^, \$minnesting,
167 q^mincompact/.*/10^, \$mincompact,
168 q^maxcompact/.*/30^, \$maxcompact,
169 q^doclevel/.*/top^, \$doclevel,
170 q^onlyfirst/.*/0^, \$onlyfirst,
171 q^freqsort/.*/0^, \$freqsort,
172 q^recopt/.*/-1^, \$recopt, # Used in nested metadata such as -metadata Year/Organisation
173
174 "allow_extra_options")) {
175
176 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
177 $self->print_txt_usage(""); # Use default resource bundle
178 die "\n";
179 }
180
181 if (!defined $metaname) {
182 my $outhandle = $self->{'outhandle'};
183 print $outhandle "AZCompactList used with no metadata name to classify by\n";
184 die "\n";
185 }
186
187 $title = $metaname unless ($title);
188
189 $self->{'list'} = {};
190 $self->{'listmetavalue'} = {};
191 $self->{'reclassify'} = {};
192 $self->{'reclassifylist'} = {};
193 $self->{'metaname'} = $metaname;
194 $self->{'title'} = "$title"; # title for the titlebar.
195 if (defined($removeprefix) && $removeprefix) {
196 $removeprefix =~ s/^\^//; # don't need a leading ^
197 $self->{'removeprefix'} = $removeprefix;
198 }
199 if (defined($removesuffix) && $removesuffix) {
200 $removesuffix =~ s/\$$//; # don't need a trailing $
201 $self->{'removesuffix'} = $removesuffix;
202 }
203 $self->{'mingroup'} = $mingroup;
204 $self->{'minnesting'} = $minnesting;
205 $self->{'mincompact'} = $mincompact;
206 $self->{'maxcompact'} = $maxcompact;
207 $self->{'doclevel'} = $doclevel;
208
209 if ($onlyfirst != 0) {
210 $onlyfirst = 1;
211 }
212 $self->{'onlyfirst'} = $onlyfirst;
213
214 if ($freqsort != 0) {
215 $freqsort = 1;
216 }
217 $self->{'freqsort'} = $freqsort;
218
219 if ($recopt == -1) {
220 $recopt = undef;
221 } else {
222 $recopt = "on";
223 }
224 $self->{'recopt'} = $recopt;
225
226 return bless $self, $class;
227}
228
229sub init
230{
231 my $self = shift (@_);
232
233 $self->{'list'} = {};
234 $self->{'listmetavalue'} = {};
235 $self->{'reclassify'} = {};
236 $self->{'reclassifylist'} = {};
237}
238
239$tmp = 0;
240
241sub classify
242{
243 my $self = shift (@_);
244 my ($doc_obj) = @_;
245
246 my $doc_OID = $doc_obj->get_OID();
247
248 my @sectionlist = ();
249 my $topsection = $doc_obj->get_top_section();
250
251 my $metaname = $self->{'metaname'};
252 my $outhandle = $self->{'outhandle'};
253
254 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
255
256 if ($self->{'doclevel'} =~ /^top(level)?/i)
257 {
258 push(@sectionlist,$topsection);
259 }
260 else
261 {
262 my $thissection = $doc_obj->get_next_section($topsection);
263 while (defined $thissection)
264 {
265 push(@sectionlist,$thissection);
266 $thissection = $doc_obj->get_next_section ($thissection);
267 }
268 }
269
270 my $thissection;
271 foreach $thissection (@sectionlist)
272 {
273 my $full_doc_OID
274 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
275
276 if (defined $self->{'list'}->{$full_doc_OID})
277 {
278 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
279 }
280 $self->{'list'}->{$full_doc_OID} = [];
281 $self->{'listmetavalue'}->{$full_doc_OID} = [];
282
283 my $metavalues = $doc_obj->get_metadata($thissection,$metaname);
284 my $metavalue;
285 foreach $metavalue (@$metavalues)
286 {
287 # if this document doesn't contain the metadata element we're
288 # sorting by we won't include it in this classification
289 if (defined $metavalue && $metavalue =~ /\w/)
290 {
291 if (defined($self->{'removeprefix'}) &&
292 length($self->{'removeprefix'})) {
293 $metavalue =~ s/^$self->{'removeprefix'}//;
294
295 # check that it's not now empty
296 if (!$metavalue) {next;}
297 }
298
299 if (defined($self->{'removesuffix'}) &&
300 length($self->{'removesuffix'})) {
301 $metavalue =~ s/$self->{'removesuffix'}$//;
302
303 # check that it's not now empty
304 if (!$metavalue) {next;}
305 }
306
307 my $formatted_metavalue = $metavalue;
308
309 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
310 {
311 &sorttools::format_string_name_english (\$formatted_metavalue);
312 }
313 else
314 {
315 &sorttools::format_string_english (\$formatted_metavalue);
316 }
317
318 #### prefix-str
319 if (! defined($formatted_metavalue)) {
320 print $outhandle "Warning: AZCompactList: metavalue is ";
321 print $outhandle "empty\n";
322 $formatted_metavalue="";
323 }
324
325 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
326 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
327
328 last if ($self->{'onlyfirst'});
329 }
330 }
331 my $date = $doc_obj->get_metadata_element($thissection,"Date");
332 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
333 }
334}
335
336sub reinit
337{
338 my ($self,$classlist_ref) = @_;
339 my $outhandle = $self->{'outhandle'};
340
341 my %mtfreq = ();
342 my @single_classlist = ();
343 my @multiple_classlist = ();
344
345 # find out how often each metavalue occurs
346 map
347 {
348 my $mv;
349 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
350 {
351 $mtfreq{$mv}++;
352 }
353 } @$classlist_ref;
354
355 # use this information to split the list: single metavalue/repeated value
356 map
357 {
358 my $i = 1;
359 my $metavalue;
360 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
361 {
362 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
363 {
364 push(@multiple_classlist,[$_,$i,$metavalue]);
365 }
366 else
367 {
368 push(@single_classlist,[$_,$metavalue]);
369 $metavalue =~ tr/[A-Z]/[a-z]/;
370 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
371 }
372 $i++;
373 }
374 } @$classlist_ref;
375
376
377 # Setup sub-classifiers for multiple list
378
379 $self->{'classifiers'} = {};
380
381 my $pm;
382 foreach $pm ("List", "SectionList")
383 {
384 my $listname
385 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
386 if (-e $listname) { require $listname; }
387 else
388 {
389 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
390 die "\n";
391 }
392 }
393
394 # Create classifiers objects for each entry >= mingroup
395 my $metavalue;
396 foreach $metavalue (keys %mtfreq)
397 {
398 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
399 {
400 my $listclassobj;
401 my $doclevel = $self->{'doclevel'};
402 my $metaname = $self->{'metaname'};
403 my $mingroup = $self->{'mingroup'};
404
405 my @metaname_list = split('/',$metaname);
406 $metaname = shift(@metaname_list);
407 if (@metaname_list==0)
408 {
409 my @args;
410 push @args, ("-metadata", "$metaname");
411 # buttonname is also used for the node's title
412 push @args, ("-buttonname", "$metavalue");
413 push @args, ("-sort", "Date");
414
415 if ($doclevel =~ m/^top(level)?/i)
416 {
417 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
418 }
419 else
420 {
421 eval ("\$listclassobj = new SectionList(\@args)");
422 }
423 }
424 else
425 {
426 $metaname = join('/',@metaname_list);
427
428 my @args;
429 push @args, ("-metadata", "$metaname");
430 # buttonname is also used for the node's title
431 push @args, ("-buttonname", "$metavalue");
432 push @args, ("-doclevel", "$doclevel");
433 push @args, ("-mingroup", $mingroup);
434 push @args, "-recopt ";
435
436 eval ("\$listclassobj = new AZCompactList(\@args)");
437 }
438 if ($@) {
439 print $outhandle "$@";
440 die "\n";
441 }
442
443 $listclassobj->init();
444
445 if (defined $metavalue && $metavalue =~ /\w/)
446 {
447 my $formatted_node = $metavalue;
448
449 if (defined($self->{'removeprefix'}) &&
450 length($self->{'removeprefix'})) {
451 $formatted_node =~ s/^$self->{'removeprefix'}//;
452 # check that it's not now empty
453 if (!$formatted_node) {next;}
454 }
455 if (defined($self->{'removesuffix'}) &&
456 length($self->{'removesuffix'})) {
457 $formatted_node =~ s/$self->{'removesuffix'}$//;
458 # check that it's not now empty
459 if (!$formatted_node) {next;}
460 }
461
462 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
463 {
464 &sorttools::format_string_name_english(\$formatted_node);
465 }
466 else
467 {
468 &sorttools::format_string_english(\$formatted_node);
469 }
470
471 # In case our formatted string is empty...
472 if (! defined($formatted_node)) {
473 print $outhandle "Warning: AZCompactList: metavalue is ";
474 print $outhandle "empty\n";
475 $formatted_node="";
476 }
477
478 $self->{'classifiers'}->{$metavalue}
479 = { 'classifyobj' => $listclassobj,
480 'formattednode' => $formatted_node };
481 }
482 }
483 }
484
485
486 return (\@single_classlist,\@multiple_classlist);
487}
488
489
490sub reclassify
491{
492 my ($self,$multiple_cl_ref) = @_;
493
494 # Entries in the current classify list that are "book nodes"
495 # should be recursively classified.
496 #--
497 foreach $dm_pair (@$multiple_cl_ref)
498 {
499 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
500 my $listclassobj;
501
502 # find metavalue in list of sub-classifiers
503 my $found = 0;
504 my $node_name;
505 foreach $node_name (keys %{$self->{'classifiers'}})
506 {
507 $resafe_node_name = $node_name;
508 # escape chars that mean something to perl...
509 $resafe_node_name =~ s/([\\\(\)\[\]\{\}\^\$\.\+\*\?\|])/\\$1/g;
510 if ($metavalue =~ m/^$resafe_node_name$/i)
511 {
512 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
513
514 ## date appears to not be used in classifier call ####
515
516 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
517 {
518 my $section=$1;
519 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
520 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
521 ->classify($doc_obj, "Section=$section");
522 } else { # section level
523 # Thanks to Don Gourley for this...
524 # classify can't handle multi-level section
525 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
526 ->classify_section($section, $doc_obj, $date);
527 }
528 }
529 else
530 {
531 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
532 ->classify($doc_obj);
533 }
534
535 $found = 1;
536 last;
537 }
538 }
539
540 if (!$found)
541 {
542 my $outhandle=$self->{outhandle};
543 print $outhandle "Warning: AZCompactList::reclassify ";
544 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
545 }
546 }
547}
548
549
550
551sub get_reclassify_info
552{
553 my $self = shift (@_);
554
555 my $node_name;
556 foreach $node_name (keys %{$self->{'classifiers'}})
557 {
558 my $classifyinfo
559 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
560 ->get_classify_info();
561 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
562 = $classifyinfo;
563 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
564 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
565 }
566}
567
568
569sub alpha_numeric_cmp
570{
571 my ($self,$a,$b) = @_;
572
573 my $title_a = $self->{'reclassifylist'}->{$a};
574 my $title_b = $self->{'reclassifylist'}->{$b};
575
576 if ($title_a =~ m/^(\d+(\.\d+)?)/)
577 {
578 my $val_a = $1;
579 if ($title_b =~ m/^(\d+(\.\d+)?)/)
580 {
581 my $val_b = $1;
582 if ($val_a != $val_b)
583 {
584 return ($val_a <=> $val_b);
585 }
586 }
587 }
588
589 return ($title_a cmp $title_b);
590}
591
592sub frequency_cmp
593{
594 my ($self,$a,$b) = @_;
595
596
597 my $title_a = $self->{'reclassifylist'}->{$a};
598 my $title_b = $self->{'reclassifylist'}->{$b};
599
600 my $a_freq = 1;
601 my $b_freq = 1;
602
603 if ($a =~ m/^CLASSIFY\.(.*)$/)
604 {
605 my $a_node = $1;
606 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
607 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
608 }
609
610 if ($b =~ m/^CLASSIFY\.(.*)$/)
611 {
612 my $b_node = $1;
613 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
614 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
615 }
616
617 return $b_freq <=> $a_freq;
618}
619
620sub get_classify_info {
621 my $self = shift (@_);
622
623 my @classlist =keys %{$self->{'list'}}; # list all doc oids
624
625 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
626 $self->reclassify($multiple_cl_ref);
627 $self->get_reclassify_info();
628
629 my @reclassified_classlist;
630 if ($self->{'freqsort'})
631 {
632 @reclassified_classlist
633 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
634 # supress sub-grouping by alphabet
635 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
636 }
637 else
638 {
639# @reclassified_classlist
640# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
641
642 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
643
644 @reclassified_classlist
645 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
646
647 }
648
649 return $self->splitlist (\@reclassified_classlist);
650}
651
652sub get_entry {
653 my $self = shift (@_);
654 my ($title, $childtype, $metaname, $thistype) = @_;
655
656 # organise into classification structure
657 my %classifyinfo = ('childtype'=>$childtype,
658 'Title'=>$title,
659 'contains'=>[],
660 'mdtype'=>$metaname);
661
662 $classifyinfo{'thistype'} = $thistype
663 if defined $thistype && $thistype =~ /\w/;
664
665 return \%classifyinfo;
666}
667
668
669
670# splitlist takes an ordered list of classifications (@$classlistref) and
671# splits it up into alphabetical sub-sections.
672sub splitlist {
673 my $self = shift (@_);
674 my ($classlistref) = @_;
675 my $classhash = {};
676
677 # top level
678 my @metanames = split("/",$self->{'metaname'});
679 my $metaname = shift(@metanames);
680
681 my $childtype = "HList";
682 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
683
684 my $classifyinfo;
685 if (!defined($self->{'recopt'}))
686 {
687 my $title = $self->{'title'}; # should always be defined by now....
688 $title = $metaname unless defined $title;
689 $classifyinfo
690 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
691 }
692 else
693 {
694 my $title = $self->{'title'};
695 $classifyinfo
696 = $self->get_entry ($title, $childtype, $metaname, "VList");
697 }
698
699 # don't need to do any splitting if there are less than 'minnesting' classifications
700 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
701 foreach $subOID (@$classlistref) {
702 if ($subOID =~ /^CLASSIFY\.(.*)$/
703 && defined $self->{'classifiers'}->{$1})
704 {
705 push (@{$classifyinfo->{'contains'}},
706 $self->{'classifiers'}->{$1}->{'classifyinfo'});
707 }
708 else
709 {
710 $subOID =~ s/^Metavalue_(\d+)\.//;
711 my $metaname_offset = $1 -1;
712 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
713 push (@{$classifyinfo->{'contains'}}, $oid_rec);
714 }
715 }
716 return $classifyinfo;
717 }
718
719 # first split up the list into separate A-Z and 0-9 classifications
720 foreach $classification (@$classlistref) {
721 my $title = $self->{'reclassifylist'}->{$classification};
722 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
723### $title =~ s/^\s+//g; # remove a leading spaces
724### $title =~ s/^_+//g; # remove a leading underscores
725 $title =~ s/^\W+//g;
726### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
727
728# only want first character for classification
729 $title =~ m/^(.)/; $title=$1;
730 $title =~ tr/[a-z]/[A-Z]/;
731
732 if ($title =~ /^[0-9]$/) {$title = '0-9';}
733 elsif ($title !~ /^[A-Z]$/) {
734 my $outhandle = $self->{'outhandle'};
735 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
736 }
737 $classhash->{$title} = [] unless defined $classhash->{$title};
738 push (@{$classhash->{$title}}, $classification);
739 }
740 $classhash = $self->compactlist ($classhash);
741
742 my @tmparr = ();
743 foreach $subsection (sort keys (%$classhash)) {
744 push (@tmparr, $subsection);
745 }
746
747 # if there's a 0-9 section it will have been sorted to the beginning
748 # but we want it at the end
749 if ($tmparr[0] eq '0-9') {
750 shift @tmparr;
751 push (@tmparr, '0-9');
752 }
753
754 foreach $subclass (@tmparr)
755 {
756 my $tempclassify
757 = (scalar(@tmparr)==1)
758 ? ($self->get_entry(" ", "VList", $metaname))
759 : ($self->get_entry($subclass, "VList", $metaname));
760
761
762 foreach $subsubOID (@{$classhash->{$subclass}})
763 {
764 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
765 && defined $self->{'classifiers'}->{$1})
766 {
767 push (@{$tempclassify->{'contains'}},
768 $self->{'classifiers'}->{$1}->{'classifyinfo'});
769 }
770 else
771 {
772 $subsubOID =~ s/^Metavalue_(\d+)\.//;
773 my $metaname_offset = $1 -1;
774 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
775 push (@{$tempclassify->{'contains'}}, $oid_rec);
776 }
777 }
778 push (@{$classifyinfo->{'contains'}}, $tempclassify);
779 }
780
781 return $classifyinfo;
782}
783
784sub compactlist {
785 my $self = shift (@_);
786 my ($classhashref) = @_;
787 my $compactedhash = {};
788 my @currentOIDs = ();
789 my $currentfirstletter = "";
790 my $currentlastletter = "";
791 my $lastkey = "";
792
793 # minimum and maximum documents to be displayed per page.
794 # the actual maximum will be max + (min-1).
795 # the smallest sub-section is a single letter at present
796 # so in this case there may be many times max documents
797 # displayed on a page.
798 my $min = $self->{'mincompact'};
799 my $max = $self->{'maxcompact'};
800
801 foreach $subsection (sort keys %$classhashref) {
802 if ($subsection eq '0-9') {
803 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
804 next;
805 }
806 $currentfirstletter = $subsection if $currentfirstletter eq "";
807 if ((scalar (@currentOIDs) < $min) ||
808 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
809 push (@currentOIDs, @{$classhashref->{$subsection}});
810 $currentlastletter = $subsection;
811 } else {
812
813 if ($currentfirstletter eq $currentlastletter) {
814 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
815 $lastkey = $currentfirstletter;
816 } else {
817 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
818 $lastkey = "$currentfirstletter-$currentlastletter";
819 }
820 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
821 $compactedhash->{$subsection} = $classhashref->{$subsection};
822 @currentOIDs = ();
823 $currentfirstletter = "";
824 $lastkey=$subsection;
825 } else {
826 @currentOIDs = @{$classhashref->{$subsection}};
827 $currentfirstletter = $subsection;
828 $currentlastletter = $subsection;
829 }
830 }
831 }
832
833 # add final OIDs to last sub-classification if there aren't many otherwise
834 # add final sub-classification
835
836 # don't add if there aren't any oids
837 if (! scalar (@currentOIDs)) {return $compactedhash;}
838
839 if (scalar (@currentOIDs) < $min) {
840 my ($newkey) = $lastkey =~ /^(.)/;
841 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
842 delete $compactedhash->{$lastkey};
843 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
844 } else {
845 if ($currentfirstletter eq $currentlastletter) {
846 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
847 } else {
848 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
849 }
850 }
851
852 return $compactedhash;
853}
854
8551;
856
857
Note: See TracBrowser for help on using the repository browser.