source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 11997

Last change on this file since 11997 was 11665, checked in by kjdon, 18 years ago

changed sort option type to metadata. it was string cos one of the values is 'nosort'. but in GLI, a metadata type gives you an editable box anyway, so metadata type is fine.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.1 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
26# options are:
27
28package AZCompactList;
29
30use BasClas;
31use sorttools;
32
33use strict;
34no strict 'refs'; # allow filehandles to be variables and viceversa
35
36sub BEGIN {
37 @AZCompactList::ISA = ('BasClas');
38}
39
40my $doclevel_list =
41 [ { 'name' => "top",
42 'desc' => "{AZCompactList.doclevel.top}" },
43 { 'name' => "section",
44 'desc' => "{AZCompactList.doclevel.section}" } ];
45
46my $arguments =
47 [ { 'name' => "metadata",
48 'desc' => "{AZCompactList.metadata}",
49 'type' => "metadata",
50 'reqd' => "yes" },
51 { 'name' => "firstvalueonly",
52 'desc' => "{AZCompactList.firstvalueonly}",
53 'type' => "flag",
54 'reqd' => "no" },
55 { 'name' => "allvalues",
56 'desc' => "{AZCompactList.allvalues}",
57 'type' => "flag",
58 'reqd' => "no" },
59 { 'name' => "sort",
60 'desc' => "{AZCompactList.sort}",
61 'type' => "metadata",
62# 'deft' => "Title",
63 'reqd' => "no" },
64 { 'name' => "removeprefix",
65 'desc' => "{BasClas.removeprefix}",
66 'type' => "regexp",
67 'deft' => "",
68 'reqd' => "no" },
69 { 'name' => "removesuffix",
70 'desc' => "{BasClas.removesuffix}",
71 'type' => "regexp",
72 'deft' => "",
73 'reqd' => "no" },
74 { 'name' => "mingroup",
75 'desc' => "{AZCompactList.mingroup}",
76 'type' => "int",
77 'deft' => "1",
78 'range' => "1,",
79 'reqd' => "no" },
80 { 'name' => "minnesting",
81 'desc' => "{AZCompactList.minnesting}",
82 'type' => "int",
83 'deft' => "20",
84 'range' => "2,",
85 'reqd' => "no" },
86 { 'name' => "mincompact",
87 'desc' => "{AZCompactList.mincompact}",
88 'type' => "int",
89 'deft' => "10",
90 'range' => "1,",
91 'reqd' => "no" },
92 { 'name' => "maxcompact",
93 'desc' => "{AZCompactList.maxcompact}",
94 'type' => "int",
95 'deft' => "30",
96 'range' => "1,",
97 'reqd' => "no" },
98 { 'name' => "doclevel",
99 'desc' => "{AZCompactList.doclevel}",
100 'type' => "enum",
101 'list' => $doclevel_list,
102 'deft' => "top",
103 'reqd' => "no" },
104 { 'name' => "freqsort",
105 'desc' => "{AZCompactList.freqsort}",
106 'type' => "flag"},
107 { 'name' => "recopt",
108 'desc' => "{AZCompactList.recopt}",
109 'type' => "flag",
110 'reqd' => "no" } ];
111
112my $options =
113{ 'name' => "AZCompactList",
114 'desc' => "{AZCompactList.desc}",
115 'abstract' => "no",
116 'inherits' => "yes",
117 'args' => $arguments };
118
119
120sub new {
121 my ($class) = shift (@_);
122 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
123 push(@$pluginlist, $class);
124
125 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
126 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
127
128 my $self = (defined $hashArgOptLists)? new BasClas($pluginlist,$inputargs,$hashArgOptLists): new BasClas($pluginlist,$inputargs);
129
130 if ($self->{'info_only'}) {
131 # don't worry about any options etc
132 return bless $self, $class;
133 }
134
135 if (!$self->{"metadata"}) {
136 my $outhandle = $self->{'outhandle'};
137 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
138 $self->print_txt_usage("");
139 die "AZCompactList Error: required option -metadata not supplied\n";
140 }
141
142 # Manually set $self parameters.
143 $self->{'list'} = {};
144 $self->{'listmetavalue'} = {};
145 $self->{'list_mvpair'} = {};
146 $self->{'reclassify'} = {};
147 $self->{'reclassifylist'} = {};
148
149 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
150
151 if (defined($self->{"removeprefix"}) && $self->{"removeprefix"}) {
152 $self->{"removeprefix"} =~ s/^\^//; # don't need a leading ^
153 }
154 if (defined($self->{"removesuffix"}) && $self->{"removesuffix"}) {
155 $self->{"removesuffix"} =~ s/\$$//; # don't need a trailing $
156 }
157
158 $self->{'recopt'} = ($self->{'recopt'} == 0) ? undef : "on";
159
160 # Clean out the unused keys
161 if($self->{"removeprefix"} eq "") {delete $self->{"removeprefix"};}
162 if($self->{"removesuffix"} eq "") {delete $self->{"removesuffix"};}
163
164 return bless $self, $class;
165}
166
167sub init
168{
169 my $self = shift (@_);
170
171 $self->{'list'} = {};
172 $self->{'listmetavalue'} = {};
173 $self->{'list_mvpair'} = {};
174 $self->{'reclassify'} = {};
175 $self->{'reclassifylist'} = {};
176}
177
178my $tmp = 0;
179
180sub classify
181{
182 my $self = shift (@_);
183 my ($doc_obj) = @_;
184
185 my $doc_OID = $doc_obj->get_OID();
186
187 my @sectionlist = ();
188 my $topsection = $doc_obj->get_top_section();
189 my $metaname = $self->{'metadata'};
190 my $outhandle = $self->{'outhandle'};
191
192 $metaname =~ s/(\/|\|).*//; # grab first name in n1/n2/n3 or n1|n2|n3 list
193 my @commameta_list = split(/,/, $metaname);
194
195 if ($self->{'doclevel'} =~ /^top(level)?/i)
196 {
197 push(@sectionlist,$topsection);
198 }
199 else
200 {
201 my $thissection = $doc_obj->get_next_section($topsection);
202 while (defined $thissection)
203 {
204 push(@sectionlist,$thissection);
205 $thissection = $doc_obj->get_next_section ($thissection);
206 }
207 }
208
209 my $thissection;
210 foreach $thissection (@sectionlist)
211 {
212 my $full_doc_OID
213 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
214 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
215 {
216 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
217 }
218 $self->{'list'}->{$full_doc_OID} = [];
219 $self->{'listmetavalue'}->{$full_doc_OID} = [];
220 $self->{'list_mvpair'}->{$full_doc_OID} = [];
221
222 my $metavalues = [];
223 foreach my $cmn (@commameta_list) {
224 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
225 push(@$metavalues,@$cmvalues) if (@{$cmvalues});
226 last if (@{$cmvalues} && !$self->{'allvalues'});
227 }
228
229 my $metavalue;
230 foreach $metavalue (@$metavalues)
231 {
232 # Tidy up use of white space in metavalue for classifying
233 $metavalue =~ s/^\s*//s;
234 $metavalue =~ s/\s*$//s;
235 $metavalue =~ s/\n/ /s;
236 $metavalue =~ s/\s{2,}/ /s;
237
238 # if this document doesn't contain the metadata element we're
239 # sorting by we won't include it in this classification
240 if (defined $metavalue && $metavalue =~ /\w/)
241 {
242 if (defined($self->{'removeprefix'}) &&
243 length($self->{'removeprefix'})) {
244 $metavalue =~ s/^$self->{'removeprefix'}//;
245
246 # check that it's not now empty
247 if (!$metavalue) {next;}
248 }
249
250 if (defined($self->{'removesuffix'}) &&
251 length($self->{'removesuffix'})) {
252 $metavalue =~ s/$self->{'removesuffix'}$//;
253
254 # check that it's not now empty
255 if (!$metavalue) {next;}
256 }
257
258 my $formatted_metavalue;
259 if ($self->{'no_metadata_formatting'}) {
260 $formatted_metavalue = $metavalue;
261 } else {
262 $formatted_metavalue = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $metavalue, $doc_obj);
263 }
264
265 #### prefix-str
266 if (! defined($formatted_metavalue)) {
267 print $outhandle "Warning: AZCompactList: metavalue is ";
268 print $outhandle "empty\n";
269 $formatted_metavalue="";
270 }
271
272 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
273 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
274 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
275 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
276
277
278 last if ($self->{'firstvalueonly'});
279 }
280 }
281
282 # This is used in reclassify below for AZCompactSectionList
283 my $sortmeta = $doc_obj->get_metadata_element($thissection, $self->{'sort'});
284 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$sortmeta];
285 }
286}
287
288sub reinit
289{
290 my ($self,$classlist_ref) = @_;
291 my $outhandle = $self->{'outhandle'};
292
293 my %mtfreq = ();
294 my @single_classlist = ();
295 my @multiple_classlist = ();
296
297 # find out how often each metavalue occurs
298 map
299 {
300 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
301 {
302### print STDERR "*** plain mv = $mvp->{'mv'}\n";
303### print STDERR "*** format mv = $mvp->{'fmv'}\n";
304
305 my $metavalue = $mvp->{'mv'};
306 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
307 $mtfreq{$metavalue}++;
308 }
309 } @$classlist_ref;
310
311 # use this information to split the list: single metavalue/repeated value
312 map
313 {
314 my $i = 1;
315 my $metavalue;
316 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
317 {
318 my $metavalue = $mvp->{'mv'};
319 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
320 my $cs_metavalue = $mvp->{'mv'}; # case sensative
321 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
322 {
323### print STDERR "*** pushing on $cs_metavalue\n";
324 push(@multiple_classlist,[$_,$i,$metavalue]);
325 }
326 else
327 {
328 push(@single_classlist,[$_,$cs_metavalue]);
329 $metavalue =~ tr/[A-Z]/[a-z]/;
330 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
331 }
332 $i++;
333 }
334 } @$classlist_ref;
335
336
337 # Setup sub-classifiers for multiple list
338
339 $self->{'classifiers'} = {};
340
341 my $pm;
342 foreach $pm ("List", "SectionList")
343 {
344 my $listname
345 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
346 if (-e $listname) { require $listname; }
347 else
348 {
349 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
350 die "\n";
351 }
352 }
353
354 # Create classifiers objects for each entry >= mingroup
355 my $metavalue;
356 my $doclevel = $self->{'doclevel'};
357 my $mingroup = $self->{'mingroup'};
358 my @metaname_list = split(/\/|\|/,$self->{'metadata'});
359 my $metaname = shift(@metaname_list);
360 my $hierarchical = 0;
361 if (scalar(@metaname_list) > 1) {
362 $hierarchical = 1;
363 $metaname = join('/',@metaname_list);
364 }
365 foreach $metavalue (keys %mtfreq)
366 {
367 if ($mtfreq{$metavalue} >= $mingroup)
368 {
369 # occurs more often than minimum required to compact into a group
370 my $listclassobj;
371
372 if (!$hierarchical)
373 {
374 my @args;
375 push @args, ("-metadata", "$metaname");
376 # buttonname is also used for the node's title
377 push @args, ("-buttonname", "$metavalue");
378 push @args, ("-sort", $self->{'sort'});
379
380 my $ptArgs = \@args;
381 if ($doclevel =~ m/^top(level)?/i)
382 {
383 eval ("\$listclassobj = new List([],\$ptArgs)");
384 }
385 else
386 {
387 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
388 }
389 }
390 else
391 {
392
393 my @args;
394 push @args, ("-metadata", "$metaname");
395 # buttonname is also used for the node's title
396 push @args, ("-buttonname", "$metavalue");
397 push @args, ("-sort", $self->{'sort'});
398
399 if (defined $self->{'removeprefix'}) {
400 push @args, ("-removeprefix", $self->{'removeprefix'});
401 }
402 if (defined $self->{'removesuffix'}) {
403 push @args, ("-removesuffix", $self->{'removesuffix'});
404 }
405
406 push @args, ("-doclevel", "$doclevel");
407 push @args, ("-mingroup", $mingroup);
408 push @args, "-recopt ";
409
410 my $ptArgs = \@args;
411 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
412 }
413
414 if ($@) {
415 print $outhandle "$@";
416 die "\n";
417 }
418
419 $listclassobj->init();
420
421 if (defined $metavalue && $metavalue =~ /\w/)
422 {
423 my $formatted_node = $metavalue;
424
425 if (defined($self->{'removeprefix'}) &&
426 length($self->{'removeprefix'})) {
427 $formatted_node =~ s/^$self->{'removeprefix'}//;
428 # check that it's not now empty
429 if (!$formatted_node) {next;}
430 }
431 if (defined($self->{'removesuffix'}) &&
432 length($self->{'removesuffix'})) {
433 $formatted_node =~ s/$self->{'removesuffix'}$//;
434 # check that it's not now empty
435 if (!$formatted_node) {next;}
436 }
437
438 $formatted_node = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $formatted_node) unless $self->{'no_metadata_formatting'};
439
440 # In case our formatted string is empty...
441 if (! defined($formatted_node)) {
442 print $outhandle "Warning: AZCompactList: metavalue is ";
443 print $outhandle "empty\n";
444 $formatted_node="";
445 }
446
447 # use the lower case, for speed of lookup.
448 my $meta_lc=lc($metavalue);
449 $self->{'classifiers'}->{$meta_lc}
450 = { 'classifyobj' => $listclassobj,
451 'formattednode' => $formatted_node };
452 }
453 }
454 }
455
456
457 return (\@single_classlist,\@multiple_classlist);
458}
459
460
461sub reclassify
462{
463 my ($self,$multiple_cl_ref) = @_;
464
465 # Entries in the current classify list that are "book nodes"
466 # should be recursively classified.
467 #--
468 foreach my $dm_pair (@$multiple_cl_ref)
469 {
470 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
471 my $listclassobj;
472
473 # find metavalue in list of sub-classifiers
474 # check if we have a key (lower case) for this metadata value
475 my $node_name=lc($metavalue);
476 if (exists $self->{'classifiers'}->{$node_name})
477 {
478 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
479
480 # record the metadata value offset temporarily, so eg AZList can
481 # get the correct metadata value (for multi-valued metadata fields)
482 $doc_obj->{'mdoffset'}=$mdoffset;
483
484 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
485 {
486 my $section=$1;
487 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
488 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
489 ->classify($doc_obj, "Section=$section");
490 } else { # section level
491 # Thanks to Don Gourley for this...
492 # classify can't handle multi-level section
493 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
494 ->classify_section($section, $doc_obj, $sortmeta);
495 }
496 }
497 else
498 {
499 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
500 ->classify($doc_obj);
501 }
502 } else { # this key is not in the hash
503 my $outhandle=$self->{outhandle};
504 print $outhandle "Warning: AZCompactList::reclassify ";
505 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
506 }
507 }
508}
509
510
511
512sub get_reclassify_info
513{
514 my $self = shift (@_);
515
516 my $node_name;
517 foreach $node_name (keys %{$self->{'classifiers'}})
518 {
519 my $classifyinfo
520 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
521 ->get_classify_info();
522 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
523 = $classifyinfo;
524 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
525 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
526 }
527}
528
529
530sub alpha_numeric_cmp
531{
532 my ($self,$a,$b) = @_;
533
534 my $title_a = $self->{'reclassifylist'}->{$a};
535 my $title_b = $self->{'reclassifylist'}->{$b};
536
537 if ($title_a =~ m/^(\d+(\.\d+)?)/)
538 {
539 my $val_a = $1;
540 if ($title_b =~ m/^(\d+(\.\d+)?)/)
541 {
542 my $val_b = $1;
543 if ($val_a != $val_b)
544 {
545 return ($val_a <=> $val_b);
546 }
547 }
548 }
549
550 return ($title_a cmp $title_b);
551}
552
553sub frequency_cmp
554{
555 my ($self,$a,$b) = @_;
556
557
558 my $title_a = $self->{'reclassifylist'}->{$a};
559 my $title_b = $self->{'reclassifylist'}->{$b};
560
561 my $a_freq = 1;
562 my $b_freq = 1;
563
564 if ($a =~ m/^CLASSIFY\.(.*)$/)
565 {
566 my $a_node = $1;
567 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
568 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
569 }
570
571 if ($b =~ m/^CLASSIFY\.(.*)$/)
572 {
573 my $b_node = $1;
574 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
575 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
576 }
577
578 return $b_freq <=> $a_freq;
579}
580
581sub get_classify_info {
582 my $self = shift (@_);
583
584 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
585
586 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
587 $self->reclassify($multiple_cl_ref);
588 $self->get_reclassify_info();
589
590 my @reclassified_classlist;
591 if ($self->{'freqsort'})
592 {
593 @reclassified_classlist
594 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
595 # supress sub-grouping by alphabet
596 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
597 }
598 else
599 {
600# @reclassified_classlist
601# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
602
603 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
604
605 @reclassified_classlist
606 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
607
608 }
609
610 return $self->splitlist (\@reclassified_classlist);
611}
612
613sub get_entry {
614 my $self = shift (@_);
615 my ($title, $childtype, $metaname, $thistype) = @_;
616 # organise into classification structure
617 my %classifyinfo = ('childtype'=>$childtype,
618 'Title'=>$title,
619 'contains'=>[],
620 'mdtype'=>$metaname);
621
622 $classifyinfo{'thistype'} = $thistype
623 if defined $thistype && $thistype =~ /\w/;
624
625 return \%classifyinfo;
626}
627
628
629
630# splitlist takes an ordered list of classifications (@$classlistref) and
631# splits it up into alphabetical sub-sections.
632sub splitlist {
633 my $self = shift (@_);
634 my ($classlistref) = @_;
635 my $classhash = {};
636
637 # top level
638 my @metanames = split(/\/|\|/,$self->{'metadata'});
639 my $metaname = shift(@metanames);
640
641 my $childtype = "HList";
642 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
643
644 my $title = $self->{'buttonname'}; # should always be defined by now.
645 my $classifyinfo;
646 if (!defined($self->{'recopt'}))
647 {
648 $classifyinfo
649 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
650 }
651 else
652 {
653 $classifyinfo
654 = $self->get_entry ($title, $childtype, $metaname, "VList");
655 }
656
657 # don't need to do any splitting if there are less than 'minnesting' classifications
658 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
659 foreach my $subOID (@$classlistref) {
660 if ($subOID =~ /^CLASSIFY\.(.*)$/
661 && defined $self->{'classifiers'}->{$1})
662 {
663 push (@{$classifyinfo->{'contains'}},
664 $self->{'classifiers'}->{$1}->{'classifyinfo'});
665 }
666 else
667 {
668 $subOID =~ s/^Metavalue_(\d+)\.//;
669 my $metaname_offset = $1 -1;
670 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
671 push (@{$classifyinfo->{'contains'}}, $oid_rec);
672 }
673 }
674 return $classifyinfo;
675 }
676
677 # first split up the list into separate A-Z and 0-9 classifications
678 foreach my $classification (@$classlistref) {
679 my $title = $self->{'reclassifylist'}->{$classification};
680 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
681 $title =~ s/^\W+//g; # remove leading non-word chars
682
683 # only want first character for classification
684 $title =~ m/^(.)/; # always a char, or first byte of wide char?
685 if (defined($1) && $1 ne "") {
686 $title=$1;
687 } else {
688 print STDERR "no first character found for \"$title\" - \"" .
689 $self->{'reclassifylist'}->{$classification} . "\"\n";
690 }
691 $title =~ tr/[a-z]/[A-Z]/;
692
693 if ($title =~ /^[0-9]$/) {$title = '0-9';}
694 elsif ($title !~ /^[A-Z]$/) {
695 my $outhandle = $self->{'outhandle'};
696 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
697 }
698 $classhash->{$title} = [] unless defined $classhash->{$title};
699 push (@{$classhash->{$title}}, $classification);
700 }
701 $classhash = $self->compactlist ($classhash);
702
703 my @tmparr = ();
704 foreach my $subsection (sort keys (%$classhash)) {
705 push (@tmparr, $subsection);
706 }
707
708 # if there's a 0-9 section it will have been sorted to the beginning
709 # but we want it at the end
710 if ($tmparr[0] eq '0-9') {
711 shift @tmparr;
712 push (@tmparr, '0-9');
713 }
714 foreach my $subclass (@tmparr)
715 {
716 my $tempclassify
717 = (scalar(@tmparr)==1)
718 ? ($self->get_entry(" ", "VList", $metaname))
719 : ($self->get_entry($subclass, "VList", $metaname));
720
721
722 foreach my $subsubOID (@{$classhash->{$subclass}})
723 {
724 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
725 && defined $self->{'classifiers'}->{$1})
726 {
727 # this is a "bookshelf" node... >1 entry compacted
728 push (@{$tempclassify->{'contains'}},
729 $self->{'classifiers'}->{$1}->{'classifyinfo'});
730 # set the metadata field name, for mdoffset to work
731 $self->{'classifiers'}->{$1}->{'classifyinfo'}->{'mdtype'}=
732 $metaname;
733 }
734 else
735 {
736 $subsubOID =~ s/^Metavalue_(\d+)\.//;
737 # record the offset if this metadata type has multiple values
738 my $metaname_offset = $1 -1;
739 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
740 push (@{$tempclassify->{'contains'}}, $oid_rec);
741 }
742 }
743 push (@{$classifyinfo->{'contains'}}, $tempclassify);
744 }
745
746 return $classifyinfo;
747}
748
749sub compactlist {
750 my $self = shift (@_);
751 my ($classhashref) = @_;
752 my $compactedhash = {};
753 my @currentOIDs = ();
754 my $currentfirstletter = "";
755 my $currentlastletter = "";
756 my $lastkey = "";
757
758 # minimum and maximum documents to be displayed per page.
759 # the actual maximum will be max + (min-1).
760 # the smallest sub-section is a single letter at present
761 # so in this case there may be many times max documents
762 # displayed on a page.
763 my $min = $self->{'mincompact'};
764 my $max = $self->{'maxcompact'};
765
766 foreach my $subsection (sort keys %$classhashref) {
767 if ($subsection eq '0-9') {
768 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
769 next;
770 }
771 $currentfirstletter = $subsection if $currentfirstletter eq "";
772 if ((scalar (@currentOIDs) < $min) ||
773 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
774 push (@currentOIDs, @{$classhashref->{$subsection}});
775 $currentlastletter = $subsection;
776 } else {
777
778 if ($currentfirstletter eq $currentlastletter) {
779 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
780 $lastkey = $currentfirstletter;
781 } else {
782 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
783 $lastkey = "$currentfirstletter-$currentlastletter";
784 }
785 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
786 $compactedhash->{$subsection} = $classhashref->{$subsection};
787 @currentOIDs = ();
788 $currentfirstletter = "";
789 $lastkey=$subsection;
790 } else {
791 @currentOIDs = @{$classhashref->{$subsection}};
792 $currentfirstletter = $subsection;
793 $currentlastletter = $subsection;
794 }
795 }
796 }
797
798 # add final OIDs to last sub-classification if there aren't many otherwise
799 # add final sub-classification
800
801 # don't add if there aren't any oids
802 if (! scalar (@currentOIDs)) {return $compactedhash;}
803
804 if (scalar (@currentOIDs) < $min) {
805 my ($newkey) = $lastkey =~ /^(.)/;
806 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
807 delete $compactedhash->{$lastkey};
808 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
809 } else {
810 if ($currentfirstletter eq $currentlastletter) {
811 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
812 } else {
813 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
814 }
815 }
816
817 return $compactedhash;
818}
819
8201;
821
822
Note: See TracBrowser for help on using the repository browser.