source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 4840

Last change on this file since 4840 was 4840, checked in by davidb, 21 years ago

-removesuffix option added to provide similar funtionality to -removeprefix

  • Property svn:keywords set to Author Date Id Revision
File size: 23.5 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28#
29# metadata=Metaname -- all documents with Metaname metadata
30# will be included in list, list will be sorted
31# by this element.
32# buttonname=Title -- (optional) the title field for this classification.
33# if not included title field will be Metaname.
34# mingroup=Num -- (optional) the smallest value that will cause
35# a group in the hierarchy to form.
36# minnesting=Num -- (optional) the smallest value that will cause a
37# list to converted into nested list
38# mincompact=Num -- (optional) used in compact list
39# maxcompact=Num -- (optional) used in compact list
40# doclevel=top|section -- (optional) level to process document at.
41# onlyfirst -- (optional) control whether all or only first
42# metadata value used from array of metadata
43package AZCompactList;
44
45use BasClas;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52my $doclevel_list =
53 [ { 'name' => "top",
54 'desc' => "Whole document." } ,
55 { 'name' => "section",
56 'desc' => "By sections." } ];
57
58my $arguments =
59 [ { 'name' => "metadata",
60 'desc' => "Metadata field used for classification. List will be sorted by this element.",
61 'type' => "metadata",
62 'reqd' => "yes" },
63 { 'name' => "buttonname",
64 'desc' => "Button name for this classification.",
65 'type' => "string",
66 'deft' => "Metadata element specified with -metadata",
67 'reqd' => "no" },
68 { 'name' => "removeprefix",
69 'desc' => "A prefix to ignore in the Metadata values for the field when sorting.",
70 'type' => "string",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "removesuffix",
74 'desc' => "A suffix to ignore in the Metadata values for the field when sorting.",
75 'type' => "string",
76 'deft' => "",
77 'reqd' => "no" },
78 { 'name' => "mingroup",
79 'desc' => "The smallest value that will cause a group in the hierarchy to form.",
80 'type' => "int",
81 'deft' => "2",
82 'reqd' => "no" },
83 { 'name' => "minnesting",
84 'desc' => "The smallest value that will cause a list to converted into nested list.",
85 'type' => "int",
86 'deft' => "20",
87 'reqd' => "no" },
88 { 'name' => "mincompact",
89 'desc' => "Minimum number of documents to be displayed per page.",
90 'type' => "int",
91 'deft' => "10",
92 'reqd' => "no" },
93 { 'name' => "maxcompact",
94 'desc' => "Maximum number of documents to be displayed per page.",
95 'type' => "int",
96 'deft' => "30",
97 'reqd' => "no" },
98 { 'name' => "doclevel",
99 'desc' => "Level to process document at.",
100 'type' => "enum",
101 'list' => $doclevel_list,
102 'deft' => "top",
103 'reqd' => "no" },
104 { 'name' => "onlyfirst",
105 'desc' => "Control whether all or only first metadata value used from array of metadata.",
106 'type' => "flag",
107 'reqd' => "no" },
108 { 'name' => "freqsort",
109 'desc' => "Sort by node frequency rather than alpha-numeric.",
110 'type' => "flag"},
111 { 'name' => "recopt",
112 'desc' => "?",
113 'type' => "string",
114 'deft' => "" } ];
115
116my $options =
117{ 'name' => "AZCompactList",
118 'desc' => "Classifier plugin for sorting alphabetically",
119 'inherits' => "Yes",
120 'args' => $arguments };
121
122# sub print_usage {
123# print STDERR "
124# usage: classify AZCompactList -metadata X [options]
125# options:
126# -metadata X (required) Metadata field used for classification
127# -buttonname X Title to use on web pages (defaults to metadata)
128# -removeprefix regex pattern to remove from metadata before sorting
129# -doclevel top|section (Defaults to top)
130# -freqsort Sort by node frequency rather than alpha-numeric
131# -mingroup N Minimum num of documents required to form a new group
132# -minnesting N Minimum list size to become a nested list
133# -mincompact N Used in compact list
134# -maxcompact N Used in compact list
135# -onlyfirst Only use the first value if metadata is repeated.
136# -recopt
137# ";
138# }
139
140sub new {
141 my $class = shift (@_);
142 my $self = new BasClas($class, @_);
143
144 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
145 my $option_list = $self->{'option_list'};
146 push( @{$option_list}, $options );
147
148
149 my ($metaname, $title, $removeprefix, $removesuffix);
150 my $mingroup = 2;
151 my $minnesting = 20;
152 my $mincompact = 10;
153 my $maxcompact = 30;
154 my $doclevel = "top";
155 my $onlyfirst = 0;
156 my $freqsort = 0;
157 my $recopt = undef;
158
159 if (!parsargv::parse(\@_,
160 q^metadata/.*/^, \$metaname,
161 q^buttonname/.*/^, \$title,
162 q^removeprefix/.*/^, \$removeprefix,
163 q^removesuffix/.*/^, \$removesuffix,
164 q^mingroup/.*/2^, \$mingroup,
165 q^minnesting/.*/20^, \$minnesting,
166 q^mincompact/.*/10^, \$mincompact,
167 q^maxcompact/.*/30^, \$maxcompact,
168 q^doclevel/.*/top^, \$doclevel,
169 q^onlyfirst/.*/0^, \$onlyfirst,
170 q^freqsort/.*/0^, \$freqsort,
171 q^recopt/.*/-1^, \$recopt,
172
173 "allow_extra_options")) {
174
175 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
176 $self->print_txt_usage();
177 die "\n";
178 }
179
180 if (!defined $metaname) {
181 my $outhandle = $self->{'outhandle'};
182 print $outhandle "AZCompactList used with no metadata name to classify by\n";
183 die "\n";
184 }
185
186 $title = $metaname unless ($title);
187
188 $self->{'list'} = {};
189 $self->{'listmetavalue'} = {};
190 $self->{'reclassify'} = {};
191 $self->{'reclassifylist'} = {};
192 $self->{'metaname'} = $metaname;
193 $self->{'title'} = "$title"; # title for the titlebar.
194 if (defined($removeprefix) && $removeprefix) {
195 $removeprefix =~ s/^\^//; # don't need a leading ^
196 $self->{'removeprefix'} = $removeprefix;
197 }
198 if (defined($removesuffix) && $removesuffix) {
199 $removesuffix =~ s/\$$//; # don't need a trailing $
200 $self->{'removesuffix'} = $removesuffix;
201 }
202 $self->{'mingroup'} = $mingroup;
203 $self->{'minnesting'} = $minnesting;
204 $self->{'mincompact'} = $mincompact;
205 $self->{'maxcompact'} = $maxcompact;
206 $self->{'doclevel'} = $doclevel;
207
208 if ($onlyfirst != 0) {
209 $onlyfirst = 1;
210 }
211 $self->{'onlyfirst'} = $onlyfirst;
212
213 if ($freqsort != 0) {
214 $freqsort = 1;
215 }
216 $self->{'freqsort'} = $freqsort;
217
218 if ($recopt == -1) {
219 $recopt = undef;
220 } else {
221 $recopt = "on";
222 }
223 $self->{'recopt'} = $recopt;
224
225 return bless $self, $class;
226}
227
228sub init
229{
230 my $self = shift (@_);
231
232 $self->{'list'} = {};
233 $self->{'listmetavalue'} = {};
234 $self->{'reclassify'} = {};
235 $self->{'reclassifylist'} = {};
236}
237
238$tmp = 0;
239
240sub classify
241{
242 my $self = shift (@_);
243 my ($doc_obj) = @_;
244
245 my $doc_OID = $doc_obj->get_OID();
246
247 my @sectionlist = ();
248 my $topsection = $doc_obj->get_top_section();
249
250 my $metaname = $self->{'metaname'};
251 my $outhandle = $self->{'outhandle'};
252
253 $metaname =~ s/(\/.*)//; # grab first name in n1/n2/n3 list
254
255 if ($self->{'doclevel'} =~ /^top(level)?/i)
256 {
257 push(@sectionlist,$topsection);
258 }
259 else
260 {
261 my $thissection = $doc_obj->get_next_section($topsection);
262 while (defined $thissection)
263 {
264 push(@sectionlist,$thissection);
265 $thissection = $doc_obj->get_next_section ($thissection);
266 }
267 }
268
269 my $thissection;
270 foreach $thissection (@sectionlist)
271 {
272 my $full_doc_OID
273 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
274
275 if (defined $self->{'list'}->{$full_doc_OID})
276 {
277 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
278 }
279 $self->{'list'}->{$full_doc_OID} = [];
280 $self->{'listmetavalue'}->{$full_doc_OID} = [];
281
282 my $metavalues = $doc_obj->get_metadata($thissection,$metaname);
283 my $metavalue;
284 foreach $metavalue (@$metavalues)
285 {
286 # if this document doesn't contain the metadata element we're
287 # sorting by we won't include it in this classification
288 if (defined $metavalue && $metavalue =~ /\w/)
289 {
290 if (defined($self->{'removeprefix'}) &&
291 length($self->{'removeprefix'})) {
292 $metavalue =~ s/^$self->{'removeprefix'}//;
293
294 # check that it's not now empty
295 if (!$metavalue) {next;}
296 }
297
298 if (defined($self->{'removesuffix'}) &&
299 length($self->{'removesuffix'})) {
300 $metavalue =~ s/$self->{'removesuffix'}$//;
301
302 # check that it's not now empty
303 if (!$metavalue) {next;}
304 }
305
306 my $formatted_metavalue = $metavalue;
307
308 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
309 {
310 &sorttools::format_string_name_english (\$formatted_metavalue);
311 }
312 else
313 {
314 &sorttools::format_string_english (\$formatted_metavalue);
315 }
316
317 #### prefix-str
318 if (! defined($formatted_metavalue)) {
319 print $outhandle "Warning: AZCompactList: metavalue is ";
320 print $outhandle "empty\n";
321 $formatted_metavalue="";
322 }
323
324 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
325 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
326
327 last if ($self->{'onlyfirst'});
328 }
329 }
330 my $date = $doc_obj->get_metadata_element($thissection,"Date");
331 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$date];
332 }
333}
334
335sub reinit
336{
337 my ($self,$classlist_ref) = @_;
338 my $outhandle = $self->{'outhandle'};
339
340 my %mtfreq = ();
341 my @single_classlist = ();
342 my @multiple_classlist = ();
343
344 # find out how often each metavalue occurs
345 map
346 {
347 my $mv;
348 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
349 {
350 $mtfreq{$mv}++;
351 }
352 } @$classlist_ref;
353
354 # use this information to split the list: single metavalue/repeated value
355 map
356 {
357 my $i = 1;
358 my $metavalue;
359 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
360 {
361 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
362 {
363 push(@multiple_classlist,[$_,$i,$metavalue]);
364 }
365 else
366 {
367 push(@single_classlist,[$_,$metavalue]);
368 $metavalue =~ tr/[A-Z]/[a-z]/;
369 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
370 }
371 $i++;
372 }
373 } @$classlist_ref;
374
375
376 # Setup sub-classifiers for multiple list
377
378 $self->{'classifiers'} = {};
379
380 my $pm;
381 foreach $pm ("List", "SectionList")
382 {
383 my $listname
384 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
385 if (-e $listname) { require $listname; }
386 else
387 {
388 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
389 die "\n";
390 }
391 }
392
393 # Create classifiers objects for each entry >= mingroup
394 my $metavalue;
395 foreach $metavalue (keys %mtfreq)
396 {
397 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
398 {
399 my $listclassobj;
400 my $doclevel = $self->{'doclevel'};
401 my $metaname = $self->{'metaname'};
402 my @metaname_list = split('/',$metaname);
403 $metaname = shift(@metaname_list);
404 if (@metaname_list==0)
405 {
406 my @args;
407 push @args, ("-metadata", "$metaname");
408# buttonname is also used for the node's title
409 push @args, ("-buttonname", "$metavalue");
410 push @args, ("-sort", "Date");
411
412 if ($doclevel =~ m/^top(level)?/i)
413 {
414 eval ("\$listclassobj = new List(\@args)"); warn $@ if $@;
415 }
416 else
417 {
418 eval ("\$listclassobj = new SectionList(\@args)");
419 }
420 }
421 else
422 {
423 $metaname = join('/',@metaname_list);
424
425 my @args;
426 push @args, ("-metadata", "$metaname");
427# buttonname is also used for the node's title
428 push @args, ("-buttonname", "$metavalue");
429 push @args, ("-doclevel", "$doclevel");
430 push @args, "-recopt";
431
432 eval ("\$listclassobj = new AZCompactList(\@args)");
433 }
434 if ($@) {
435 print $outhandle "$@";
436 die "\n";
437 }
438
439 $listclassobj->init();
440
441 if (defined $metavalue && $metavalue =~ /\w/)
442 {
443 my $formatted_node = $metavalue;
444
445 if (defined($self->{'removeprefix'}) &&
446 length($self->{'removeprefix'})) {
447 $formatted_node =~ s/^$self->{'removeprefix'}//;
448 # check that it's not now empty
449 if (!$formatted_node) {next;}
450 }
451 if (defined($self->{'removesuffix'}) &&
452 length($self->{'removesuffix'})) {
453 $formatted_node =~ s/$self->{'removesuffix'}$//;
454 # check that it's not now empty
455 if (!$formatted_node) {next;}
456 }
457
458 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
459 {
460 &sorttools::format_string_name_english(\$formatted_node);
461 }
462 else
463 {
464 &sorttools::format_string_english(\$formatted_node);
465 }
466
467 # In case our formatted string is empty...
468 if (! defined($formatted_node)) {
469 print $outhandle "Warning: AZCompactList: metavalue is ";
470 print $outhandle "empty\n";
471 $formatted_node="";
472 }
473
474 $self->{'classifiers'}->{$metavalue}
475 = { 'classifyobj' => $listclassobj,
476 'formattednode' => $formatted_node };
477 }
478 }
479 }
480
481
482 return (\@single_classlist,\@multiple_classlist);
483}
484
485
486sub reclassify
487{
488 my ($self,$multiple_cl_ref) = @_;
489
490 # Entries in the current classify list that are "book nodes"
491 # should be recursively classified.
492 #--
493 foreach $dm_pair (@$multiple_cl_ref)
494 {
495 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
496 my $listclassobj;
497
498 # find metavalue in list of sub-classifiers
499 my $found = 0;
500 my $node_name;
501 foreach $node_name (keys %{$self->{'classifiers'}})
502 {
503 $resafe_node_name = $node_name;
504 # escape chars that mean something to perl...
505 $resafe_node_name =~ s/([\\\(\)\[\]\{\}\^\$\.\+\*\?\|])/\\$1/g;
506 if ($metavalue =~ m/^$resafe_node_name$/i)
507 {
508 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
509
510 ## date appears to not be used in classifier call ####
511
512 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
513 {
514 my $section=$1;
515 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
516 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
517 ->classify($doc_obj, "Section=$section");
518 } else { # section level
519 # Thanks to Don Gourley for this...
520 # classify can't handle multi-level section
521 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
522 ->classify_section($section, $doc_obj, $date);
523 }
524 }
525 else
526 {
527 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
528 ->classify($doc_obj);
529 }
530
531 $found = 1;
532 last;
533 }
534 }
535
536 if (!$found)
537 {
538 my $outhandle=$self->{outhandle};
539 print $outhandle "Warning: AZCompactList::reclassify ";
540 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
541 }
542 }
543}
544
545
546
547sub get_reclassify_info
548{
549 my $self = shift (@_);
550
551 my $node_name;
552 foreach $node_name (keys %{$self->{'classifiers'}})
553 {
554 my $classifyinfo
555 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
556 ->get_classify_info();
557 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
558 = $classifyinfo;
559 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
560 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
561 }
562}
563
564
565sub alpha_numeric_cmp
566{
567 my ($self,$a,$b) = @_;
568
569 my $title_a = $self->{'reclassifylist'}->{$a};
570 my $title_b = $self->{'reclassifylist'}->{$b};
571
572 if ($title_a =~ m/^(\d+(\.\d+)?)/)
573 {
574 my $val_a = $1;
575 if ($title_b =~ m/^(\d+(\.\d+)?)/)
576 {
577 my $val_b = $1;
578 if ($val_a != $val_b)
579 {
580 return ($val_a <=> $val_b);
581 }
582 }
583 }
584
585 return ($title_a cmp $title_b);
586}
587
588sub frequency_cmp
589{
590 my ($self,$a,$b) = @_;
591
592
593 my $title_a = $self->{'reclassifylist'}->{$a};
594 my $title_b = $self->{'reclassifylist'}->{$b};
595
596 my $a_freq = 1;
597 my $b_freq = 1;
598
599 if ($a =~ m/^CLASSIFY\.(.*)$/)
600 {
601 my $a_node = $1;
602 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
603 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
604 }
605
606 if ($b =~ m/^CLASSIFY\.(.*)$/)
607 {
608 my $b_node = $1;
609 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
610 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
611 }
612
613 return $b_freq <=> $a_freq;
614}
615
616sub get_classify_info {
617 my $self = shift (@_);
618
619 my @classlist =keys %{$self->{'list'}}; # list all doc oids
620
621 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
622 $self->reclassify($multiple_cl_ref);
623 $self->get_reclassify_info();
624
625 my @reclassified_classlist;
626 if ($self->{'freqsort'})
627 {
628 @reclassified_classlist
629 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
630 # supress sub-grouping by alphabet
631 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
632 }
633 else
634 {
635# @reclassified_classlist
636# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
637
638 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
639
640 @reclassified_classlist
641 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
642
643 }
644
645 return $self->splitlist (\@reclassified_classlist);
646}
647
648sub get_entry {
649 my $self = shift (@_);
650 my ($title, $childtype, $metaname, $thistype) = @_;
651
652 # organise into classification structure
653 my %classifyinfo = ('childtype'=>$childtype,
654 'Title'=>$title,
655 'contains'=>[],
656 'mdtype'=>$metaname);
657
658 $classifyinfo{'thistype'} = $thistype
659 if defined $thistype && $thistype =~ /\w/;
660
661 return \%classifyinfo;
662}
663
664
665
666# splitlist takes an ordered list of classifications (@$classlistref) and
667# splits it up into alphabetical sub-sections.
668sub splitlist {
669 my $self = shift (@_);
670 my ($classlistref) = @_;
671 my $classhash = {};
672
673 # top level
674 my @metanames = split("/",$self->{'metaname'});
675 my $metaname = shift(@metanames);
676
677 my $childtype = "HList";
678 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
679
680 my $classifyinfo;
681 if (!defined($self->{'recopt'}))
682 {
683 my $title = $self->{'title'}; # should always be defined by now....
684 $title = $metaname unless defined $title;
685 $classifyinfo
686 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
687 }
688 else
689 {
690 my $title = $self->{'title'};
691 $classifyinfo
692 = $self->get_entry ($title, $childtype, $metaname, "VList");
693 }
694
695 # don't need to do any splitting if there are less than 'minnesting' classifications
696 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
697 foreach $subOID (@$classlistref) {
698 if ($subOID =~ /^CLASSIFY\.(.*)$/
699 && defined $self->{'classifiers'}->{$1})
700 {
701 push (@{$classifyinfo->{'contains'}},
702 $self->{'classifiers'}->{$1}->{'classifyinfo'});
703 }
704 else
705 {
706 $subOID =~ s/^Metavalue_(\d+)\.//;
707 my $metaname_offset = $1 -1;
708 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
709 push (@{$classifyinfo->{'contains'}}, $oid_rec);
710 }
711 }
712 return $classifyinfo;
713 }
714
715 # first split up the list into separate A-Z and 0-9 classifications
716 foreach $classification (@$classlistref) {
717 my $title = $self->{'reclassifylist'}->{$classification};
718 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
719### $title =~ s/^\s+//g; # remove a leading spaces
720### $title =~ s/^_+//g; # remove a leading underscores
721 $title =~ s/^\W+//g;
722### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
723
724# only want first character for classification
725 $title =~ m/^(.)/; $title=$1;
726 $title =~ tr/[a-z]/[A-Z]/;
727
728 if ($title =~ /^[0-9]$/) {$title = '0-9';}
729 elsif ($title !~ /^[A-Z]$/) {
730 my $outhandle = $self->{'outhandle'};
731 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
732 }
733 $classhash->{$title} = [] unless defined $classhash->{$title};
734 push (@{$classhash->{$title}}, $classification);
735 }
736 $classhash = $self->compactlist ($classhash);
737
738 my @tmparr = ();
739 foreach $subsection (sort keys (%$classhash)) {
740 push (@tmparr, $subsection);
741 }
742
743 # if there's a 0-9 section it will have been sorted to the beginning
744 # but we want it at the end
745 if ($tmparr[0] eq '0-9') {
746 shift @tmparr;
747 push (@tmparr, '0-9');
748 }
749
750 foreach $subclass (@tmparr)
751 {
752 my $tempclassify
753 = (scalar(@tmparr)==1)
754 ? ($self->get_entry(" ", "VList", $metaname))
755 : ($self->get_entry($subclass, "VList", $metaname));
756
757
758 foreach $subsubOID (@{$classhash->{$subclass}})
759 {
760 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
761 && defined $self->{'classifiers'}->{$1})
762 {
763 push (@{$tempclassify->{'contains'}},
764 $self->{'classifiers'}->{$1}->{'classifyinfo'});
765 }
766 else
767 {
768 $subsubOID =~ s/^Metavalue_(\d+)\.//;
769 my $metaname_offset = $1 -1;
770 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
771 push (@{$tempclassify->{'contains'}}, $oid_rec);
772 }
773 }
774 push (@{$classifyinfo->{'contains'}}, $tempclassify);
775 }
776
777 return $classifyinfo;
778}
779
780sub compactlist {
781 my $self = shift (@_);
782 my ($classhashref) = @_;
783 my $compactedhash = {};
784 my @currentOIDs = ();
785 my $currentfirstletter = "";
786 my $currentlastletter = "";
787 my $lastkey = "";
788
789 # minimum and maximum documents to be displayed per page.
790 # the actual maximum will be max + (min-1).
791 # the smallest sub-section is a single letter at present
792 # so in this case there may be many times max documents
793 # displayed on a page.
794 my $min = $self->{'mincompact'};
795 my $max = $self->{'maxcompact'};
796
797 foreach $subsection (sort keys %$classhashref) {
798 if ($subsection eq '0-9') {
799 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
800 next;
801 }
802 $currentfirstletter = $subsection if $currentfirstletter eq "";
803 if ((scalar (@currentOIDs) < $min) ||
804 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
805 push (@currentOIDs, @{$classhashref->{$subsection}});
806 $currentlastletter = $subsection;
807 } else {
808
809 if ($currentfirstletter eq $currentlastletter) {
810 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
811 $lastkey = $currentfirstletter;
812 } else {
813 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
814 $lastkey = "$currentfirstletter-$currentlastletter";
815 }
816 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
817 $compactedhash->{$subsection} = $classhashref->{$subsection};
818 @currentOIDs = ();
819 $currentfirstletter = "";
820 } else {
821 @currentOIDs = @{$classhashref->{$subsection}};
822 $currentfirstletter = $subsection;
823 $currentlastletter = $subsection;
824 }
825 }
826 }
827
828 # add final OIDs to last sub-classification if there aren't many otherwise
829 # add final sub-classification
830 if (scalar (@currentOIDs) < $min) {
831 my ($newkey) = $lastkey =~ /^(.)/;
832 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
833 delete $compactedhash->{$lastkey};
834 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
835 } else {
836 if ($currentfirstletter eq $currentlastletter) {
837 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
838 } else {
839 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
840 }
841 }
842
843 return $compactedhash;
844}
845
8461;
847
848
Note: See TracBrowser for help on using the repository browser.