source: gsdl/trunk/perllib/classify/AZCompactList.pm@ 19495

Last change on this file since 19495 was 19495, checked in by davidb, 15 years ago

Minor bug fix (needed to test for 'update' mode as well as 'delete' mode) to support incremental building of this classifier

  • Property svn:keywords set to Author Date Id Revision
File size: 23.4 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
26
27package AZCompactList;
28
29use BaseClassifier;
30use sorttools;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35sub BEGIN {
36 @AZCompactList::ISA = ('BaseClassifier');
37}
38
39my $doclevel_list =
40 [ { 'name' => "top",
41 'desc' => "{AZCompactList.doclevel.top}" },
42 { 'name' => "section",
43 'desc' => "{AZCompactList.doclevel.section}" } ];
44
45my $arguments =
46 [ { 'name' => "metadata",
47 'desc' => "{AZCompactList.metadata}",
48 'type' => "metadata",
49 'reqd' => "yes" },
50 { 'name' => "firstvalueonly",
51 'desc' => "{AZCompactList.firstvalueonly}",
52 'type' => "flag",
53 'reqd' => "no" },
54 { 'name' => "allvalues",
55 'desc' => "{AZCompactList.allvalues}",
56 'type' => "flag",
57 'reqd' => "no" },
58 { 'name' => "sort",
59 'desc' => "{AZCompactList.sort}",
60 'type' => "metadata",
61# 'deft' => "Title",
62 'reqd' => "no" },
63 { 'name' => "removeprefix",
64 'desc' => "{BasClas.removeprefix}",
65 'type' => "regexp",
66 'deft' => "",
67 'reqd' => "no" },
68 { 'name' => "removesuffix",
69 'desc' => "{BasClas.removesuffix}",
70 'type' => "regexp",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "mingroup",
74 'desc' => "{AZCompactList.mingroup}",
75 'type' => "int",
76 'deft' => "1",
77 'range' => "1,",
78 'reqd' => "no" },
79 { 'name' => "minnesting",
80 'desc' => "{AZCompactList.minnesting}",
81 'type' => "int",
82 'deft' => "20",
83 'range' => "2,",
84 'reqd' => "no" },
85 { 'name' => "mincompact",
86 'desc' => "{AZCompactList.mincompact}",
87 'type' => "int",
88 'deft' => "10",
89 'range' => "1,",
90 'reqd' => "no" },
91 { 'name' => "maxcompact",
92 'desc' => "{AZCompactList.maxcompact}",
93 'type' => "int",
94 'deft' => "30",
95 'range' => "1,",
96 'reqd' => "no" },
97 { 'name' => "doclevel",
98 'desc' => "{AZCompactList.doclevel}",
99 'type' => "enum",
100 'list' => $doclevel_list,
101 'deft' => "top",
102 'reqd' => "no" },
103 { 'name' => "freqsort",
104 'desc' => "{AZCompactList.freqsort}",
105 'type' => "flag"},
106 { 'name' => "recopt",
107 'desc' => "{AZCompactList.recopt}",
108 'type' => "flag",
109 'reqd' => "no" } ];
110
111my $options =
112{ 'name' => "AZCompactList",
113 'desc' => "{AZCompactList.desc}",
114 'abstract' => "no",
115 'inherits' => "yes",
116 'args' => $arguments };
117
118
119sub new {
120 my ($class) = shift (@_);
121 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
122 push(@$pluginlist, $class);
123
124 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
125 push(@{$hashArgOptLists->{"OptList"}},$options);
126
127 my $self = new BaseClassifier($pluginlist, $inputargs, $hashArgOptLists);
128
129 if ($self->{'info_only'}) {
130 # don't worry about any options etc
131 return bless $self, $class;
132 }
133
134 if (!$self->{"metadata"}) {
135 my $outhandle = $self->{'outhandle'};
136 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
137 $self->print_txt_usage("");
138 die "AZCompactList Error: required option -metadata not supplied\n";
139 }
140
141 # Manually set $self parameters.
142 $self->{'list'} = {};
143 $self->{'listmetavalue'} = {};
144 $self->{'list_mvpair'} = {};
145 $self->{'reclassify'} = {};
146 $self->{'reclassifylist'} = {};
147
148 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
149
150 if (defined($self->{"removeprefix"}) && $self->{"removeprefix"}) {
151 $self->{"removeprefix"} =~ s/^\^//; # don't need a leading ^
152 }
153 if (defined($self->{"removesuffix"}) && $self->{"removesuffix"}) {
154 $self->{"removesuffix"} =~ s/\$$//; # don't need a trailing $
155 }
156
157 $self->{'recopt'} = ($self->{'recopt'} == 0) ? undef : "on";
158
159 # Clean out the unused keys
160 if($self->{"removeprefix"} eq "") {delete $self->{"removeprefix"};}
161 if($self->{"removesuffix"} eq "") {delete $self->{"removesuffix"};}
162
163 return bless $self, $class;
164}
165
166sub init
167{
168 my $self = shift (@_);
169
170 $self->{'list'} = {};
171 $self->{'listmetavalue'} = {};
172 $self->{'list_mvpair'} = {};
173 $self->{'reclassify'} = {};
174 $self->{'reclassifylist'} = {};
175}
176
177my $tmp = 0;
178
179sub classify
180{
181 my $self = shift (@_);
182 my ($doc_obj,$edit_mode) = @_;
183
184 my $doc_OID = $doc_obj->get_OID();
185 my $outhandle = $self->{'outhandle'};
186
187 my @sectionlist = ();
188 my $topsection = $doc_obj->get_top_section();
189 my $metaname = $self->{'metadata'};
190
191 $metaname =~ s/(\/|\|).*//; # grab first name in n1/n2/n3 or n1|n2|n3 list
192 my @commameta_list = split(/,/, $metaname);
193
194 if ($self->{'doclevel'} =~ /^top(level)?/i)
195 {
196 push(@sectionlist,$topsection);
197 }
198 else
199 {
200 my $thissection = $doc_obj->get_next_section($topsection);
201 while (defined $thissection)
202 {
203 push(@sectionlist,$thissection);
204 $thissection = $doc_obj->get_next_section ($thissection);
205 }
206 }
207
208 my $thissection;
209 foreach $thissection (@sectionlist)
210 {
211 my $full_doc_OID
212 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
213
214 if (($edit_mode eq "delete") || ($edit_mode eq "update")) {
215 print $outhandle " Deleting $full_doc_OID\n";
216 delete $self->{'list'}->{$full_doc_OID};
217 delete $self->{'listmetavalue'}->{$full_doc_OID};
218 delete $self->{'list_mvpair'}->{$full_doc_OID};
219 delete $self->{'reclassify'}->{$full_doc_OID};
220
221 next if ($edit_mode eq "delete");
222 }
223
224
225 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
226 {
227 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
228 }
229 $self->{'list'}->{$full_doc_OID} = [];
230 $self->{'listmetavalue'}->{$full_doc_OID} = [];
231 $self->{'list_mvpair'}->{$full_doc_OID} = [];
232
233 my $metavalues = [];
234 foreach my $cmn (@commameta_list) {
235 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
236 push(@$metavalues,@$cmvalues) if (@{$cmvalues});
237 last if (@{$cmvalues} && !$self->{'allvalues'});
238 }
239
240 my $metavalue;
241 foreach $metavalue (@$metavalues)
242 {
243 # Tidy up use of white space in metavalue for classifying
244 $metavalue =~ s/^\s*//s;
245 $metavalue =~ s/\s*$//s;
246 $metavalue =~ s/\n/ /s;
247 $metavalue =~ s/\s{2,}/ /s;
248
249 # if this document doesn't contain the metadata element we're
250 # sorting by we won't include it in this classification
251 if (defined $metavalue && $metavalue =~ /\w/)
252 {
253 if (defined($self->{'removeprefix'}) &&
254 length($self->{'removeprefix'})) {
255 $metavalue =~ s/^$self->{'removeprefix'}//;
256
257 # check that it's not now empty
258 if (!$metavalue) {next;}
259 }
260
261 if (defined($self->{'removesuffix'}) &&
262 length($self->{'removesuffix'})) {
263 $metavalue =~ s/$self->{'removesuffix'}$//;
264
265 # check that it's not now empty
266 if (!$metavalue) {next;}
267 }
268
269 my $formatted_metavalue;
270 if ($self->{'no_metadata_formatting'}) {
271 $formatted_metavalue = $metavalue;
272 } else {
273 $formatted_metavalue = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $metavalue, $doc_obj);
274 }
275
276 #### prefix-str
277 if (! defined($formatted_metavalue)) {
278 print $outhandle "Warning: AZCompactList: metavalue is ";
279 print $outhandle "empty\n";
280 $formatted_metavalue="";
281 }
282
283 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
284 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
285 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
286 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
287
288
289 last if ($self->{'firstvalueonly'});
290 }
291 }
292
293 # This is used in reclassify below for AZCompactSectionList
294 my $sortmeta = $doc_obj->get_metadata_element($thissection, $self->{'sort'});
295 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$sortmeta];
296 }
297}
298
299sub reinit
300{
301 my ($self,$classlist_ref) = @_;
302 my $outhandle = $self->{'outhandle'};
303
304 my %mtfreq = ();
305 my @single_classlist = ();
306 my @multiple_classlist = ();
307
308 # find out how often each metavalue occurs
309 map
310 {
311 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
312 {
313### print STDERR "*** plain mv = $mvp->{'mv'}\n";
314### print STDERR "*** format mv = $mvp->{'fmv'}\n";
315
316 my $metavalue = $mvp->{'mv'};
317 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
318 $mtfreq{$metavalue}++;
319 }
320 } @$classlist_ref;
321
322 # use this information to split the list: single metavalue/repeated value
323 map
324 {
325 my $i = 1;
326 my $metavalue;
327 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
328 {
329 my $metavalue = $mvp->{'mv'};
330 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
331 my $cs_metavalue = $mvp->{'mv'}; # case sensative
332 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
333 {
334### print STDERR "*** pushing on $cs_metavalue\n";
335 push(@multiple_classlist,[$_,$i,$metavalue]);
336 }
337 else
338 {
339 push(@single_classlist,[$_,$cs_metavalue]);
340 $metavalue =~ tr/[A-Z]/[a-z]/;
341 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
342 }
343 $i++;
344 }
345 } @$classlist_ref;
346
347
348 # Setup sub-classifiers for multiple list
349
350 $self->{'classifiers'} = {};
351
352 my $pm;
353 foreach $pm ("SimpleList", "SectionList")
354 {
355 my $listname
356 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
357 if (-e $listname) { require $listname; }
358 else
359 {
360 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
361 die "\n";
362 }
363 }
364
365 # Create classifiers objects for each entry >= mingroup
366 my $metavalue;
367 my $doclevel = $self->{'doclevel'};
368 my $mingroup = $self->{'mingroup'};
369 my @metaname_list = split(/\/|\|/,$self->{'metadata'});
370 my $metaname = shift(@metaname_list);
371 my $hierarchical = 0;
372 if (scalar(@metaname_list) > 1) {
373 $hierarchical = 1;
374 $metaname = join('/',@metaname_list);
375 }
376 foreach $metavalue (keys %mtfreq)
377 {
378 if ($mtfreq{$metavalue} >= $mingroup)
379 {
380 # occurs more often than minimum required to compact into a group
381 my $listclassobj;
382
383 if (!$hierarchical)
384 {
385 my @args;
386 push @args, ("-metadata", "$metaname");
387 # buttonname is also used for the node's title
388 push @args, ("-buttonname", "$metavalue");
389 push @args, ("-sort", $self->{'sort'});
390
391 my $ptArgs = \@args;
392 if ($doclevel =~ m/^top(level)?/i)
393 {
394 eval ("\$listclassobj = new SimpleList([],\$ptArgs)");
395 }
396 else
397 {
398 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
399 }
400 }
401 else
402 {
403
404 my @args;
405 push @args, ("-metadata", "$metaname");
406 # buttonname is also used for the node's title
407 push @args, ("-buttonname", "$metavalue");
408 push @args, ("-sort", $self->{'sort'});
409
410 if (defined $self->{'removeprefix'}) {
411 push @args, ("-removeprefix", $self->{'removeprefix'});
412 }
413 if (defined $self->{'removesuffix'}) {
414 push @args, ("-removesuffix", $self->{'removesuffix'});
415 }
416
417 push @args, ("-doclevel", "$doclevel");
418 push @args, ("-mingroup", $mingroup);
419 push @args, "-recopt ";
420
421 my $ptArgs = \@args;
422 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
423 }
424
425 if ($@) {
426 print $outhandle "$@";
427 die "\n";
428 }
429
430 $listclassobj->init();
431
432 if (defined $metavalue && $metavalue =~ /\w/)
433 {
434 my $formatted_node = $metavalue;
435
436 if (defined($self->{'removeprefix'}) &&
437 length($self->{'removeprefix'})) {
438 $formatted_node =~ s/^$self->{'removeprefix'}//;
439 # check that it's not now empty
440 if (!$formatted_node) {next;}
441 }
442 if (defined($self->{'removesuffix'}) &&
443 length($self->{'removesuffix'})) {
444 $formatted_node =~ s/$self->{'removesuffix'}$//;
445 # check that it's not now empty
446 if (!$formatted_node) {next;}
447 }
448
449 $formatted_node = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $formatted_node) unless $self->{'no_metadata_formatting'};
450
451 # In case our formatted string is empty...
452 if (! defined($formatted_node)) {
453 print $outhandle "Warning: AZCompactList: metavalue is ";
454 print $outhandle "empty\n";
455 $formatted_node="";
456 }
457
458 # use the lower case, for speed of lookup.
459 my $meta_lc=lc($metavalue);
460 $self->{'classifiers'}->{$meta_lc}
461 = { 'classifyobj' => $listclassobj,
462 'formattednode' => $formatted_node };
463 }
464 }
465 }
466
467
468 return (\@single_classlist,\@multiple_classlist);
469}
470
471
472sub reclassify
473{
474 my ($self,$multiple_cl_ref) = @_;
475
476 # Entries in the current classify list that are "book nodes"
477 # should be recursively classified.
478 #--
479 foreach my $dm_pair (@$multiple_cl_ref)
480 {
481 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
482 my $listclassobj;
483
484 # find metavalue in list of sub-classifiers
485 # check if we have a key (lower case) for this metadata value
486 my $node_name=lc($metavalue);
487 if (exists $self->{'classifiers'}->{$node_name})
488 {
489 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
490
491 # record the metadata value offset temporarily, so eg AZList can
492 # get the correct metadata value (for multi-valued metadata fields)
493 $doc_obj->{'mdoffset'}=$mdoffset;
494
495 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
496 {
497 my $section=$1;
498 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
499 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
500 ->classify($doc_obj,"add", "Section=$section");
501 } else { # section level
502 # Thanks to Don Gourley for this...
503 # classify can't handle multi-level section
504 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
505 ->classify_section($section, $doc_obj, "add", $sortmeta);
506 }
507 }
508 else
509 {
510 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
511 ->classify($doc_obj,"add");
512 }
513 } else { # this key is not in the hash
514 my $outhandle=$self->{outhandle};
515 print $outhandle "Warning: AZCompactList::reclassify ";
516 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
517 }
518 }
519}
520
521
522
523sub get_reclassify_info
524{
525 my $self = shift (@_);
526
527 my $node_name;
528 foreach $node_name (keys %{$self->{'classifiers'}})
529 {
530 my $classifyinfo
531 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
532 ->get_classify_info();
533 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
534 = $classifyinfo;
535 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
536 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
537 }
538}
539
540
541sub alpha_numeric_cmp
542{
543 my ($self,$a,$b) = @_;
544
545 my $title_a = $self->{'reclassifylist'}->{$a};
546 my $title_b = $self->{'reclassifylist'}->{$b};
547
548 if ($title_a =~ m/^(\d+(\.\d+)?)/)
549 {
550 my $val_a = $1;
551 if ($title_b =~ m/^(\d+(\.\d+)?)/)
552 {
553 my $val_b = $1;
554 if ($val_a != $val_b)
555 {
556 return ($val_a <=> $val_b);
557 }
558 }
559 }
560
561 return ($title_a cmp $title_b);
562}
563
564sub frequency_cmp
565{
566 my ($self,$a,$b) = @_;
567
568
569 my $title_a = $self->{'reclassifylist'}->{$a};
570 my $title_b = $self->{'reclassifylist'}->{$b};
571
572 my $a_freq = 1;
573 my $b_freq = 1;
574
575 if ($a =~ m/^CLASSIFY\.(.*)$/)
576 {
577 my $a_node = $1;
578 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
579 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
580 }
581
582 if ($b =~ m/^CLASSIFY\.(.*)$/)
583 {
584 my $b_node = $1;
585 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
586 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
587 }
588
589 return $b_freq <=> $a_freq;
590}
591
592sub get_classify_info {
593 my $self = shift (@_);
594
595 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
596
597 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
598 $self->reclassify($multiple_cl_ref);
599 $self->get_reclassify_info();
600
601 my @reclassified_classlist;
602 if ($self->{'freqsort'})
603 {
604 @reclassified_classlist
605 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
606 # supress sub-grouping by alphabet
607 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
608 }
609 else
610 {
611# @reclassified_classlist
612# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
613
614 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
615
616 @reclassified_classlist
617 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
618
619 }
620
621 return $self->splitlist (\@reclassified_classlist);
622}
623
624sub get_entry {
625 my $self = shift (@_);
626 my ($title, $childtype, $metaname, $thistype) = @_;
627 # organise into classification structure
628 my %classifyinfo = ('childtype'=>$childtype,
629 'Title'=>$title,
630 'contains'=>[],
631 'mdtype'=>$metaname);
632
633 $classifyinfo{'thistype'} = $thistype
634 if defined $thistype && $thistype =~ /\w/;
635
636 return \%classifyinfo;
637}
638
639
640
641# splitlist takes an ordered list of classifications (@$classlistref) and
642# splits it up into alphabetical sub-sections.
643sub splitlist {
644 my $self = shift (@_);
645 my ($classlistref) = @_;
646 my $classhash = {};
647
648 # top level
649 my @metanames = split(/\/|\|/,$self->{'metadata'});
650 my $metaname = shift(@metanames);
651
652 my $childtype = "HList";
653 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
654
655 my $title = $self->{'buttonname'}; # should always be defined by now.
656 my $classifyinfo;
657 if (!defined($self->{'recopt'}))
658 {
659 $classifyinfo
660 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
661 }
662 else
663 {
664 $classifyinfo
665 = $self->get_entry ($title, $childtype, $metaname, "VList");
666 }
667
668 # don't need to do any splitting if there are less than 'minnesting' classifications
669 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
670 foreach my $subOID (@$classlistref) {
671 if ($subOID =~ /^CLASSIFY\.(.*)$/
672 && defined $self->{'classifiers'}->{$1})
673 {
674 push (@{$classifyinfo->{'contains'}},
675 $self->{'classifiers'}->{$1}->{'classifyinfo'});
676 }
677 else
678 {
679 $subOID =~ s/^Metavalue_(\d+)\.//;
680 my $metaname_offset = $1 -1;
681 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
682 push (@{$classifyinfo->{'contains'}}, $oid_rec);
683 }
684 }
685 return $classifyinfo;
686 }
687
688 # first split up the list into separate A-Z and 0-9 classifications
689 foreach my $classification (@$classlistref) {
690 my $title = $self->{'reclassifylist'}->{$classification};
691 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
692 $title =~ s/^\W+//g; # remove leading non-word chars
693
694 # only want first character for classification
695 $title =~ m/^(.)/; # always a char, or first byte of wide char?
696 if (defined($1) && $1 ne "") {
697 $title=$1;
698 } else {
699 print STDERR "no first character found for \"$title\" - \"" .
700 $self->{'reclassifylist'}->{$classification} . "\"\n";
701 }
702 $title =~ tr/[a-z]/[A-Z]/;
703
704 if ($title =~ /^[0-9]$/) {$title = '0-9';}
705 elsif ($title !~ /^[A-Z]$/) {
706 my $outhandle = $self->{'outhandle'};
707 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
708 }
709 $classhash->{$title} = [] unless defined $classhash->{$title};
710 push (@{$classhash->{$title}}, $classification);
711 }
712 $classhash = $self->compactlist ($classhash);
713
714 my @tmparr = ();
715 foreach my $subsection (sort keys (%$classhash)) {
716 push (@tmparr, $subsection);
717 }
718
719 # if there's a 0-9 section it will have been sorted to the beginning
720 # but we want it at the end
721 if ($tmparr[0] eq '0-9') {
722 shift @tmparr;
723 push (@tmparr, '0-9');
724 }
725 foreach my $subclass (@tmparr)
726 {
727 my $tempclassify
728 = (scalar(@tmparr)==1)
729 ? ($self->get_entry(" ", "VList", $metaname))
730 : ($self->get_entry($subclass, "VList", $metaname));
731
732
733 foreach my $subsubOID (@{$classhash->{$subclass}})
734 {
735 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
736 && defined $self->{'classifiers'}->{$1})
737 {
738 # this is a "bookshelf" node... >1 entry compacted
739 push (@{$tempclassify->{'contains'}},
740 $self->{'classifiers'}->{$1}->{'classifyinfo'});
741 # set the metadata field name, for mdoffset to work
742 $self->{'classifiers'}->{$1}->{'classifyinfo'}->{'mdtype'}=
743 $metaname;
744 }
745 else
746 {
747 $subsubOID =~ s/^Metavalue_(\d+)\.//;
748 # record the offset if this metadata type has multiple values
749 my $metaname_offset = $1 -1;
750 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
751 push (@{$tempclassify->{'contains'}}, $oid_rec);
752 }
753 }
754 push (@{$classifyinfo->{'contains'}}, $tempclassify);
755 }
756
757 return $classifyinfo;
758}
759
760sub compactlist {
761 my $self = shift (@_);
762 my ($classhashref) = @_;
763 my $compactedhash = {};
764 my @currentOIDs = ();
765 my $currentfirstletter = "";
766 my $currentlastletter = "";
767 my $lastkey = "";
768
769 # minimum and maximum documents to be displayed per page.
770 # the actual maximum will be max + (min-1).
771 # the smallest sub-section is a single letter at present
772 # so in this case there may be many times max documents
773 # displayed on a page.
774 my $min = $self->{'mincompact'};
775 my $max = $self->{'maxcompact'};
776
777 foreach my $subsection (sort keys %$classhashref) {
778 if ($subsection eq '0-9') {
779 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
780 next;
781 }
782 $currentfirstletter = $subsection if $currentfirstletter eq "";
783 if ((scalar (@currentOIDs) < $min) ||
784 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
785 push (@currentOIDs, @{$classhashref->{$subsection}});
786 $currentlastletter = $subsection;
787 } else {
788
789 if ($currentfirstletter eq $currentlastletter) {
790 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
791 $lastkey = $currentfirstletter;
792 } else {
793 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
794 $lastkey = "$currentfirstletter-$currentlastletter";
795 }
796 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
797 $compactedhash->{$subsection} = $classhashref->{$subsection};
798 @currentOIDs = ();
799 $currentfirstletter = "";
800 $lastkey=$subsection;
801 } else {
802 @currentOIDs = @{$classhashref->{$subsection}};
803 $currentfirstletter = $subsection;
804 $currentlastletter = $subsection;
805 }
806 }
807 }
808
809 # add final OIDs to last sub-classification if there aren't many otherwise
810 # add final sub-classification
811
812 # don't add if there aren't any oids
813 if (! scalar (@currentOIDs)) {return $compactedhash;}
814
815 if (scalar (@currentOIDs) < $min) {
816 my ($newkey) = $lastkey =~ /^(.)/;
817 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
818 delete $compactedhash->{$lastkey};
819 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
820 } else {
821 if ($currentfirstletter eq $currentlastletter) {
822 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
823 } else {
824 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
825 }
826 }
827
828 return $compactedhash;
829}
830
8311;
832
833
Note: See TracBrowser for help on using the repository browser.