source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:keywords set to Author Date Id Revision
File size: 22.8 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
26# options are:
27
28package AZCompactList;
29
30use BasClas;
31use sorttools;
32
33sub BEGIN {
34 @ISA = ('BasClas');
35}
36
37my $doclevel_list =
38 [ { 'name' => "top",
39 'desc' => "{AZCompactList.doclevel.top}" },
40 { 'name' => "section",
41 'desc' => "{AZCompactList.doclevel.section}" } ];
42
43my $arguments =
44 [ { 'name' => "metadata",
45 'desc' => "{AZCompactList.metadata}",
46 'type' => "metadata",
47 'reqd' => "yes" },
48 { 'name' => "firstvalueonly",
49 'desc' => "{AZCompactList.firstvalueonly}",
50 'type' => "flag",
51 'reqd' => "no" },
52 { 'name' => "allvalues",
53 'desc' => "{AZCompactList.allvalues}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "buttonname",
57 'desc' => "{BasClas.buttonname}",
58 'type' => "string",
59 'deft' => "",
60 'reqd' => "no" },
61 { 'name' => "sort",
62 'desc' => "{AZCompactList.sort}",
63 'type' => "string",
64# 'deft' => "Title",
65 'reqd' => "no" },
66 { 'name' => "removeprefix",
67 'desc' => "{BasClas.removeprefix}",
68 'type' => "regexp",
69 'deft' => "",
70 'reqd' => "no" },
71 { 'name' => "removesuffix",
72 'desc' => "{BasClas.removesuffix}",
73 'type' => "regexp",
74 'deft' => "",
75 'reqd' => "no" },
76 { 'name' => "mingroup",
77 'desc' => "{AZCompactList.mingroup}",
78 'type' => "int",
79 'deft' => "2",
80 'range' => "1,",
81 'reqd' => "no" },
82 { 'name' => "minnesting",
83 'desc' => "{AZCompactList.minnesting}",
84 'type' => "int",
85 'deft' => "20",
86 'range' => "2,",
87 'reqd' => "no" },
88 { 'name' => "mincompact",
89 'desc' => "{AZCompactList.mincompact}",
90 'type' => "int",
91 'deft' => "10",
92 'range' => "1,",
93 'reqd' => "no" },
94 { 'name' => "maxcompact",
95 'desc' => "{AZCompactList.maxcompact}",
96 'type' => "int",
97 'deft' => "30",
98 'range' => "1,",
99 'reqd' => "no" },
100 { 'name' => "doclevel",
101 'desc' => "{AZCompactList.doclevel}",
102 'type' => "enum",
103 'list' => $doclevel_list,
104 'deft' => "top",
105 'reqd' => "no" },
106 { 'name' => "freqsort",
107 'desc' => "{AZCompactList.freqsort}",
108 'type' => "flag"},
109 { 'name' => "recopt",
110 'desc' => "{AZCompactList.recopt}",
111 'type' => "flag",
112 'reqd' => "no" } ];
113
114my $options =
115{ 'name' => "AZCompactList",
116 'desc' => "{AZCompactList.desc}",
117 'abstract' => "no",
118 'inherits' => "yes",
119 'args' => $arguments };
120
121
122sub new {
123 my ($class) = shift (@_);
124 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
125 push(@$pluginlist, $class);
126
127 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
128 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
129
130 my $self = (defined $hashArgOptLists)? new BasClas($pluginlist,$inputargs,$hashArgOptLists): new BasClas($pluginlist,$inputargs);
131
132 if (!$self->{"metadata"}) {
133 my $outhandle = $self->{'outhandle'};
134 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
135 $self->print_txt_usage("");
136 die "AZCompactList Error: required option -metadata not supplied\n";
137 }
138
139 # Manually set $self parameters.
140 $self->{'list'} = {};
141 $self->{'listmetavalue'} = {};
142 $self->{'list_mvpair'} = {};
143 $self->{'reclassify'} = {};
144 $self->{'reclassifylist'} = {};
145
146 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
147
148 if (defined($self->{"removeprefix"}) && $self->{"removeprefix"}) {
149 $self->{"removeprefix"} =~ s/^\^//; # don't need a leading ^
150 }
151 if (defined($self->{"removesuffix"}) && $self->{"removesuffix"}) {
152 $self->{"removesuffix"} =~ s/\$$//; # don't need a trailing $
153 }
154
155 $self->{'recopt'} = ($self->{'recopt'} == 0) ? undef : "on";
156
157 # Clean out the unused keys
158 if($self->{"removeprefix"} eq "") {delete $self->{"removeprefix"};}
159 if($self->{"removesuffix"} eq "") {delete $self->{"removesuffix"};}
160
161 return bless $self, $class;
162}
163
164sub init
165{
166 my $self = shift (@_);
167
168 $self->{'list'} = {};
169 $self->{'listmetavalue'} = {};
170 $self->{'list_mvpair'} = {};
171 $self->{'reclassify'} = {};
172 $self->{'reclassifylist'} = {};
173}
174
175$tmp = 0;
176
177sub classify
178{
179 my $self = shift (@_);
180 my ($doc_obj) = @_;
181
182 my $doc_OID = $doc_obj->get_OID();
183
184 my @sectionlist = ();
185 my $topsection = $doc_obj->get_top_section();
186 my $metaname = $self->{'metadata'};
187 my $outhandle = $self->{'outhandle'};
188
189 $metaname =~ s/(\/|\|).*//; # grab first name in n1/n2/n3 or n1|n2|n3 list
190 my @commameta_list = split(/,/, $metaname);
191
192 if ($self->{'doclevel'} =~ /^top(level)?/i)
193 {
194 push(@sectionlist,$topsection);
195 }
196 else
197 {
198 my $thissection = $doc_obj->get_next_section($topsection);
199 while (defined $thissection)
200 {
201 push(@sectionlist,$thissection);
202 $thissection = $doc_obj->get_next_section ($thissection);
203 }
204 }
205
206 my $thissection;
207 foreach $thissection (@sectionlist)
208 {
209 my $full_doc_OID
210 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
211 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
212 {
213 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
214 }
215 $self->{'list'}->{$full_doc_OID} = [];
216 $self->{'listmetavalue'}->{$full_doc_OID} = [];
217 $self->{'list_mvpair'}->{$full_doc_OID} = [];
218
219 my $metavalues = [];
220 foreach my $cmn (@commameta_list) {
221 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
222 push(@$metavalues,@$cmvalues) if (@{$cmvalues});
223 last if (@{$cmvalues} && !$self->{'allvalues'});
224 }
225
226 my $metavalue;
227 foreach $metavalue (@$metavalues)
228 {
229 # Tidy up use of white space in metavalue for classifying
230 $metavalue =~ s/^\s*//s;
231 $metavalue =~ s/\s*$//s;
232 $metavalue =~ s/\n/ /s;
233 $metavalue =~ s/\s{2,}/ /s;
234
235 # if this document doesn't contain the metadata element we're
236 # sorting by we won't include it in this classification
237 if (defined $metavalue && $metavalue =~ /\w/)
238 {
239 if (defined($self->{'removeprefix'}) &&
240 length($self->{'removeprefix'})) {
241 $metavalue =~ s/^$self->{'removeprefix'}//;
242
243 # check that it's not now empty
244 if (!$metavalue) {next;}
245 }
246
247 if (defined($self->{'removesuffix'}) &&
248 length($self->{'removesuffix'})) {
249 $metavalue =~ s/$self->{'removesuffix'}$//;
250
251 # check that it's not now empty
252 if (!$metavalue) {next;}
253 }
254
255 my $formatted_metavalue =
256 sorttools::format_metadata_for_sorting($self->{'metadata'},
257 $metavalue, $doc_obj);
258
259 #### prefix-str
260 if (! defined($formatted_metavalue)) {
261 print $outhandle "Warning: AZCompactList: metavalue is ";
262 print $outhandle "empty\n";
263 $formatted_metavalue="";
264 }
265
266 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
267 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
268 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
269 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
270
271
272 last if ($self->{'firstvalueonly'});
273 }
274 }
275
276 # This is used in reclassify below for AZCompactSectionList
277 my $sortmeta = $doc_obj->get_metadata_element($thissection, $self->{'sort'});
278 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$sortmeta];
279 }
280}
281
282sub reinit
283{
284 my ($self,$classlist_ref) = @_;
285 my $outhandle = $self->{'outhandle'};
286
287 my %mtfreq = ();
288 my @single_classlist = ();
289 my @multiple_classlist = ();
290
291 # find out how often each metavalue occurs
292 map
293 {
294 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
295 {
296### print STDERR "*** plain mv = $mvp->{'mv'}\n";
297### print STDERR "*** format mv = $mvp->{'fmv'}\n";
298
299 my $metavalue = $mvp->{'mv'};
300 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
301 $mtfreq{$metavalue}++;
302 }
303 } @$classlist_ref;
304
305 # use this information to split the list: single metavalue/repeated value
306 map
307 {
308 my $i = 1;
309 my $metavalue;
310 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
311 {
312 my $metavalue = $mvp->{'mv'};
313 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
314 my $cs_metavalue = $mvp->{'mv'}; # case sensative
315 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
316 {
317### print STDERR "*** pushing on $cs_metavalue\n";
318 push(@multiple_classlist,[$_,$i,$metavalue]);
319 }
320 else
321 {
322 push(@single_classlist,[$_,$cs_metavalue]);
323 $metavalue =~ tr/[A-Z]/[a-z]/;
324 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
325 }
326 $i++;
327 }
328 } @$classlist_ref;
329
330
331 # Setup sub-classifiers for multiple list
332
333 $self->{'classifiers'} = {};
334
335 my $pm;
336 foreach $pm ("List", "SectionList")
337 {
338 my $listname
339 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
340 if (-e $listname) { require $listname; }
341 else
342 {
343 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
344 die "\n";
345 }
346 }
347
348 # Create classifiers objects for each entry >= mingroup
349 my $metavalue;
350 my $doclevel = $self->{'doclevel'};
351 my $mingroup = $self->{'mingroup'};
352 my @metaname_list = split(/\/|\|/,$self->{'metadata'});
353 my $metaname = shift(@metaname_list);
354 my $hierarchical = 0;
355 if (scalar(@metaname_list) > 1) {
356 $hierarchical = 1;
357 $metaname = join('/',@metaname_list);
358 }
359 foreach $metavalue (keys %mtfreq)
360 {
361 if ($mtfreq{$metavalue} >= $mingroup)
362 {
363 # occurs more often than minimum required to compact into a group
364 my $listclassobj;
365
366 if (!$hierarchical)
367 {
368 my @args;
369 push @args, ("-metadata", "$metaname");
370 # buttonname is also used for the node's title
371 push @args, ("-buttonname", "$metavalue");
372 push @args, ("-sort", $self->{'sort'});
373
374 my $ptArgs = \@args;
375 if ($doclevel =~ m/^top(level)?/i)
376 {
377 eval ("\$listclassobj = new List([],\$ptArgs)");
378 }
379 else
380 {
381 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
382 }
383 }
384 else
385 {
386
387 my @args;
388 push @args, ("-metadata", "$metaname");
389 # buttonname is also used for the node's title
390 push @args, ("-buttonname", "$metavalue");
391 push @args, ("-sort", $self->{'sort'});
392
393 if (defined $self->{'removeprefix'}) {
394 push @args, ("-removeprefix", $self->{'removeprefix'});
395 }
396 if (defined $self->{'removesuffix'}) {
397 push @args, ("-removesuffix", $self->{'removesuffix'});
398 }
399
400 push @args, ("-doclevel", "$doclevel");
401 push @args, ("-mingroup", $mingroup);
402 push @args, "-recopt ";
403
404 my $ptArgs = \@args;
405 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
406 }
407
408 if ($@) {
409 print $outhandle "$@";
410 die "\n";
411 }
412
413 $listclassobj->init();
414
415 if (defined $metavalue && $metavalue =~ /\w/)
416 {
417 my $formatted_node = $metavalue;
418
419 if (defined($self->{'removeprefix'}) &&
420 length($self->{'removeprefix'})) {
421 $formatted_node =~ s/^$self->{'removeprefix'}//;
422 # check that it's not now empty
423 if (!$formatted_node) {next;}
424 }
425 if (defined($self->{'removesuffix'}) &&
426 length($self->{'removesuffix'})) {
427 $formatted_node =~ s/$self->{'removesuffix'}$//;
428 # check that it's not now empty
429 if (!$formatted_node) {next;}
430 }
431
432 $formatted_node = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $formatted_node);
433
434 # In case our formatted string is empty...
435 if (! defined($formatted_node)) {
436 print $outhandle "Warning: AZCompactList: metavalue is ";
437 print $outhandle "empty\n";
438 $formatted_node="";
439 }
440
441 # use the lower case, for speed of lookup.
442 my $meta_lc=lc($metavalue);
443 $self->{'classifiers'}->{$meta_lc}
444 = { 'classifyobj' => $listclassobj,
445 'formattednode' => $formatted_node };
446 }
447 }
448 }
449
450
451 return (\@single_classlist,\@multiple_classlist);
452}
453
454
455sub reclassify
456{
457 my ($self,$multiple_cl_ref) = @_;
458
459 # Entries in the current classify list that are "book nodes"
460 # should be recursively classified.
461 #--
462 foreach $dm_pair (@$multiple_cl_ref)
463 {
464 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
465 my $listclassobj;
466
467 # find metavalue in list of sub-classifiers
468 # check if we have a key (lower case) for this metadata value
469 my $node_name=lc($metavalue);
470 if (exists $self->{'classifiers'}->{$node_name})
471 {
472 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
473
474 # record the metadata value offset temporarily, so eg AZList can
475 # get the correct metadata value (for multi-valued metadata fields)
476 $doc_obj->{'mdoffset'}=$mdoffset;
477
478 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
479 {
480 my $section=$1;
481 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
482 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
483 ->classify($doc_obj, "Section=$section");
484 } else { # section level
485 # Thanks to Don Gourley for this...
486 # classify can't handle multi-level section
487 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
488 ->classify_section($section, $doc_obj, $sortmeta);
489 }
490 }
491 else
492 {
493 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
494 ->classify($doc_obj);
495 }
496 } else { # this key is not in the hash
497 my $outhandle=$self->{outhandle};
498 print $outhandle "Warning: AZCompactList::reclassify ";
499 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
500 }
501 }
502}
503
504
505
506sub get_reclassify_info
507{
508 my $self = shift (@_);
509
510 my $node_name;
511 foreach $node_name (keys %{$self->{'classifiers'}})
512 {
513 my $classifyinfo
514 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
515 ->get_classify_info();
516 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
517 = $classifyinfo;
518 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
519 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
520 }
521}
522
523
524sub alpha_numeric_cmp
525{
526 my ($self,$a,$b) = @_;
527
528 my $title_a = $self->{'reclassifylist'}->{$a};
529 my $title_b = $self->{'reclassifylist'}->{$b};
530
531 if ($title_a =~ m/^(\d+(\.\d+)?)/)
532 {
533 my $val_a = $1;
534 if ($title_b =~ m/^(\d+(\.\d+)?)/)
535 {
536 my $val_b = $1;
537 if ($val_a != $val_b)
538 {
539 return ($val_a <=> $val_b);
540 }
541 }
542 }
543
544 return ($title_a cmp $title_b);
545}
546
547sub frequency_cmp
548{
549 my ($self,$a,$b) = @_;
550
551
552 my $title_a = $self->{'reclassifylist'}->{$a};
553 my $title_b = $self->{'reclassifylist'}->{$b};
554
555 my $a_freq = 1;
556 my $b_freq = 1;
557
558 if ($a =~ m/^CLASSIFY\.(.*)$/)
559 {
560 my $a_node = $1;
561 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
562 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
563 }
564
565 if ($b =~ m/^CLASSIFY\.(.*)$/)
566 {
567 my $b_node = $1;
568 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
569 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
570 }
571
572 return $b_freq <=> $a_freq;
573}
574
575sub get_classify_info {
576 my $self = shift (@_);
577
578 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
579
580 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
581 $self->reclassify($multiple_cl_ref);
582 $self->get_reclassify_info();
583
584 my @reclassified_classlist;
585 if ($self->{'freqsort'})
586 {
587 @reclassified_classlist
588 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
589 # supress sub-grouping by alphabet
590 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
591 }
592 else
593 {
594# @reclassified_classlist
595# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
596
597 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
598
599 @reclassified_classlist
600 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
601
602 }
603
604 return $self->splitlist (\@reclassified_classlist);
605}
606
607sub get_entry {
608 my $self = shift (@_);
609 my ($title, $childtype, $metaname, $thistype) = @_;
610 # organise into classification structure
611 my %classifyinfo = ('childtype'=>$childtype,
612 'Title'=>$title,
613 'contains'=>[],
614 'mdtype'=>$metaname);
615
616 $classifyinfo{'thistype'} = $thistype
617 if defined $thistype && $thistype =~ /\w/;
618
619 return \%classifyinfo;
620}
621
622
623
624# splitlist takes an ordered list of classifications (@$classlistref) and
625# splits it up into alphabetical sub-sections.
626sub splitlist {
627 my $self = shift (@_);
628 my ($classlistref) = @_;
629 my $classhash = {};
630
631 # top level
632 my @metanames = split(/\/|\|/,$self->{'metadata'});
633 my $metaname = shift(@metanames);
634
635 my $childtype = "HList";
636 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
637
638 my $title = $self->{'buttonname'}; # should always be defined by now.
639 my $classifyinfo;
640 if (!defined($self->{'recopt'}))
641 {
642 $classifyinfo
643 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
644 }
645 else
646 {
647 $classifyinfo
648 = $self->get_entry ($title, $childtype, $metaname, "VList");
649 }
650
651 # don't need to do any splitting if there are less than 'minnesting' classifications
652 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
653 foreach $subOID (@$classlistref) {
654 if ($subOID =~ /^CLASSIFY\.(.*)$/
655 && defined $self->{'classifiers'}->{$1})
656 {
657 push (@{$classifyinfo->{'contains'}},
658 $self->{'classifiers'}->{$1}->{'classifyinfo'});
659 }
660 else
661 {
662 $subOID =~ s/^Metavalue_(\d+)\.//;
663 my $metaname_offset = $1 -1;
664 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
665 push (@{$classifyinfo->{'contains'}}, $oid_rec);
666 }
667 }
668 return $classifyinfo;
669 }
670
671 # first split up the list into separate A-Z and 0-9 classifications
672 foreach $classification (@$classlistref) {
673 my $title = $self->{'reclassifylist'}->{$classification};
674 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
675 $title =~ s/^\W+//g; # remove leading non-word chars
676
677 # only want first character for classification
678 $title =~ m/^(.)/; # always a char, or first byte of wide char?
679 if (defined($1) && $1 ne "") {
680 $title=$1;
681 } else {
682 print STDERR "no first character found for \"$title\" - \"" .
683 $self->{'reclassifylist'}->{$classification} . "\"\n";
684 }
685 $title =~ tr/[a-z]/[A-Z]/;
686
687 if ($title =~ /^[0-9]$/) {$title = '0-9';}
688 elsif ($title !~ /^[A-Z]$/) {
689 my $outhandle = $self->{'outhandle'};
690 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
691 }
692 $classhash->{$title} = [] unless defined $classhash->{$title};
693 push (@{$classhash->{$title}}, $classification);
694 }
695 $classhash = $self->compactlist ($classhash);
696
697 my @tmparr = ();
698 foreach $subsection (sort keys (%$classhash)) {
699 push (@tmparr, $subsection);
700 }
701
702 # if there's a 0-9 section it will have been sorted to the beginning
703 # but we want it at the end
704 if ($tmparr[0] eq '0-9') {
705 shift @tmparr;
706 push (@tmparr, '0-9');
707 }
708 foreach $subclass (@tmparr)
709 {
710 my $tempclassify
711 = (scalar(@tmparr)==1)
712 ? ($self->get_entry(" ", "VList", $metaname))
713 : ($self->get_entry($subclass, "VList", $metaname));
714
715
716 foreach $subsubOID (@{$classhash->{$subclass}})
717 {
718 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
719 && defined $self->{'classifiers'}->{$1})
720 {
721 # this is a "bookshelf" node... >1 entry compacted
722 push (@{$tempclassify->{'contains'}},
723 $self->{'classifiers'}->{$1}->{'classifyinfo'});
724 # set the metadata field name, for mdoffset to work
725 $self->{'classifiers'}->{$1}->{'classifyinfo'}->{'mdtype'}=
726 $metaname;
727 }
728 else
729 {
730 $subsubOID =~ s/^Metavalue_(\d+)\.//;
731 # record the offset if this metadata type has multiple values
732 my $metaname_offset = $1 -1;
733 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
734 push (@{$tempclassify->{'contains'}}, $oid_rec);
735 }
736 }
737 push (@{$classifyinfo->{'contains'}}, $tempclassify);
738 }
739
740 return $classifyinfo;
741}
742
743sub compactlist {
744 my $self = shift (@_);
745 my ($classhashref) = @_;
746 my $compactedhash = {};
747 my @currentOIDs = ();
748 my $currentfirstletter = "";
749 my $currentlastletter = "";
750 my $lastkey = "";
751
752 # minimum and maximum documents to be displayed per page.
753 # the actual maximum will be max + (min-1).
754 # the smallest sub-section is a single letter at present
755 # so in this case there may be many times max documents
756 # displayed on a page.
757 my $min = $self->{'mincompact'};
758 my $max = $self->{'maxcompact'};
759
760 foreach $subsection (sort keys %$classhashref) {
761 if ($subsection eq '0-9') {
762 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
763 next;
764 }
765 $currentfirstletter = $subsection if $currentfirstletter eq "";
766 if ((scalar (@currentOIDs) < $min) ||
767 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
768 push (@currentOIDs, @{$classhashref->{$subsection}});
769 $currentlastletter = $subsection;
770 } else {
771
772 if ($currentfirstletter eq $currentlastletter) {
773 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
774 $lastkey = $currentfirstletter;
775 } else {
776 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
777 $lastkey = "$currentfirstletter-$currentlastletter";
778 }
779 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
780 $compactedhash->{$subsection} = $classhashref->{$subsection};
781 @currentOIDs = ();
782 $currentfirstletter = "";
783 $lastkey=$subsection;
784 } else {
785 @currentOIDs = @{$classhashref->{$subsection}};
786 $currentfirstletter = $subsection;
787 $currentlastletter = $subsection;
788 }
789 }
790 }
791
792 # add final OIDs to last sub-classification if there aren't many otherwise
793 # add final sub-classification
794
795 # don't add if there aren't any oids
796 if (! scalar (@currentOIDs)) {return $compactedhash;}
797
798 if (scalar (@currentOIDs) < $min) {
799 my ($newkey) = $lastkey =~ /^(.)/;
800 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
801 delete $compactedhash->{$lastkey};
802 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
803 } else {
804 if ($currentfirstletter eq $currentlastletter) {
805 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
806 } else {
807 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
808 }
809 }
810
811 return $compactedhash;
812}
813
8141;
815
816
Note: See TracBrowser for help on using the repository browser.