source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 10113

Last change on this file since 10113 was 10113, checked in by davidb, 19 years ago

'removesuffix' and 'removeprefix' modified so also passed down recursive call
to AZCompactList/AZList ...

  • Property svn:keywords set to Author Date Id Revision
File size: 24.4 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
26# options are:
27
28package AZCompactList;
29
30use BasClas;
31use sorttools;
32
33sub BEGIN {
34 @ISA = ('BasClas');
35}
36
37my $doclevel_list =
38 [ { 'name' => "top",
39 'desc' => "{AZCompactList.doclevel.top}" },
40 { 'name' => "section",
41 'desc' => "{AZCompactList.doclevel.section}" } ];
42
43my $arguments =
44 [ { 'name' => "metadata",
45 'desc' => "{AZCompactList.metadata}",
46 'type' => "metadata",
47 'reqd' => "yes" },
48 { 'name' => "firstvalueonly",
49 'desc' => "{AZCompactList.firstvalueonly}",
50 'type' => "flag",
51 'reqd' => "no" },
52 { 'name' => "allvalues",
53 'desc' => "{AZCompactList.allvalues}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "buttonname",
57 'desc' => "{BasClas.buttonname}",
58 'type' => "string",
59 'deft' => "",
60 'reqd' => "no" },
61 { 'name' => "sort",
62 'desc' => "{AZCompactList.sort}",
63 'type' => "string",
64 'deft' => "Title",
65 'reqd' => "no" },
66 { 'name' => "removeprefix",
67 'desc' => "{BasClas.removeprefix}",
68 'type' => "regexp",
69 'deft' => "",
70 'reqd' => "no" },
71 { 'name' => "removesuffix",
72 'desc' => "{BasClas.removesuffix}",
73 'type' => "regexp",
74 'deft' => "",
75 'reqd' => "no" },
76 { 'name' => "mingroup",
77 'desc' => "{AZCompactList.mingroup}",
78 'type' => "int",
79 'deft' => "2",
80 'range' => "1,",
81 'reqd' => "no" },
82 { 'name' => "minnesting",
83 'desc' => "{AZCompactList.minnesting}",
84 'type' => "int",
85 'deft' => "20",
86 'range' => "2,",
87 'reqd' => "no" },
88 { 'name' => "mincompact",
89 'desc' => "{AZCompactList.mincompact}",
90 'type' => "int",
91 'deft' => "10",
92 'range' => "1,",
93 'reqd' => "no" },
94 { 'name' => "maxcompact",
95 'desc' => "{AZCompactList.maxcompact}",
96 'type' => "int",
97 'deft' => "30",
98 'range' => "1,",
99 'reqd' => "no" },
100 { 'name' => "doclevel",
101 'desc' => "{AZCompactList.doclevel}",
102 'type' => "enum",
103 'list' => $doclevel_list,
104 'deft' => "top",
105 'reqd' => "no" },
106 { 'name' => "freqsort",
107 'desc' => "{AZCompactList.freqsort}",
108 'type' => "flag"},
109 { 'name' => "recopt",
110 'desc' => "{AZCompactList.recopt}",
111 'type' => "flag",
112 'reqd' => "no" } ];
113
114my $options =
115{ 'name' => "AZCompactList",
116 'desc' => "{AZCompactList.desc}",
117 'abstract' => "no",
118 'inherits' => "yes",
119 'args' => $arguments };
120
121
122sub new {
123 my $class = shift (@_);
124 my $self = new BasClas($class, @_);
125
126 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
127 my $option_list = $self->{'option_list'};
128 push( @{$option_list}, $options );
129
130 if ($self->{'info_only'}) {
131 # created from classinfo.pl - don't need to parse the arguments
132 return bless $self, $class;
133 }
134
135 my ($metaname, $title, $removeprefix, $removesuffix);
136 my $sortname = "Title";
137 my $mingroup = 2;
138 my $minnesting = 20;
139 my $mincompact = 10;
140 my $maxcompact = 30;
141 my $doclevel = "top";
142 my $firstvalueonly = 0;
143 my $allvalues = 0;
144 my $freqsort = 0;
145 my $recopt = undef;
146
147 if (!parsargv::parse(\@_,
148 q^metadata/.*/^, \$metaname,
149 q^buttonname/.*/^, \$title,
150 q^sort/.*/^, \$sortname,
151 q^removeprefix/.*/^, \$removeprefix,
152 q^removesuffix/.*/^, \$removesuffix,
153 q^mingroup/.*/2^, \$mingroup,
154 q^minnesting/.*/20^, \$minnesting,
155 q^mincompact/.*/10^, \$mincompact,
156 q^maxcompact/.*/30^, \$maxcompact,
157 q^doclevel/.*/top^, \$doclevel,
158 q^firstvalueonly^, \$firstvalueonly,
159 q^allvalues^, \$allvalues,
160 q^freqsort/.*/0^, \$freqsort,
161 q^recopt/.*/-1^, \$recopt, # Used in nested metadata such as -metadata Year/Organisation
162
163 "allow_extra_options")) {
164
165 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
166 $self->print_txt_usage(""); # Use default resource bundle
167 die "\n";
168 }
169
170 if (!defined($metaname)) {
171 my $outhandle = $self->{'outhandle'};
172 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
173 $self->print_txt_usage("");
174 die "AZCompactList Error: required option -metadata not supplied\n";
175 }
176
177 $title = $self->generate_title_from_metadata($metaname) unless ($title);
178 #$title = $metaname unless ($title);
179
180 $self->{'list'} = {};
181 $self->{'listmetavalue'} = {};
182 $self->{'list_mvpair'} = {};
183 $self->{'reclassify'} = {};
184 $self->{'reclassifylist'} = {};
185 $self->{'metaname'} = $metaname;
186 $self->{'title'} = "$title"; # title for the titlebar.
187 $self->{'sortname'} = $sortname;
188
189 if (defined($removeprefix) && $removeprefix) {
190 $removeprefix =~ s/^\^//; # don't need a leading ^
191 $self->{'removeprefix'} = $removeprefix;
192 }
193 if (defined($removesuffix) && $removesuffix) {
194 $removesuffix =~ s/\$$//; # don't need a trailing $
195 $self->{'removesuffix'} = $removesuffix;
196 }
197 $self->{'mingroup'} = $mingroup;
198 $self->{'minnesting'} = $minnesting;
199 $self->{'mincompact'} = $mincompact;
200 $self->{'maxcompact'} = $maxcompact;
201 $self->{'doclevel'} = $doclevel;
202
203 if ($firstvalueonly != 0) {
204 $firstvalueonly = 1;
205 }
206 $self->{'firstvalueonly'} = $firstvalueonly;
207
208 if ($allvalues != 0) {
209 $allvalues = 1;
210 }
211 $self->{'allvalues'} = $allvalues;
212 if ($freqsort != 0) {
213 $freqsort = 1;
214 }
215 $self->{'freqsort'} = $freqsort;
216
217 if ($recopt == -1) {
218 $recopt = undef;
219 } else {
220 $recopt = "on";
221 }
222 $self->{'recopt'} = $recopt;
223
224 return bless $self, $class;
225}
226
227sub init
228{
229 my $self = shift (@_);
230
231 $self->{'list'} = {};
232 $self->{'listmetavalue'} = {};
233 $self->{'list_mvpair'} = {};
234 $self->{'reclassify'} = {};
235 $self->{'reclassifylist'} = {};
236}
237
238$tmp = 0;
239
240sub classify
241{
242 my $self = shift (@_);
243 my ($doc_obj) = @_;
244
245 my $doc_OID = $doc_obj->get_OID();
246
247 my @sectionlist = ();
248 my $topsection = $doc_obj->get_top_section();
249 my $metaname = $self->{'metaname'};
250 my $outhandle = $self->{'outhandle'};
251
252 $metaname =~ s/(\/|\|).*//; # grab first name in n1/n2/n3 or n1|n2|n3 list
253 my @commameta_list = split(/,/, $metaname);
254
255 if ($self->{'doclevel'} =~ /^top(level)?/i)
256 {
257 push(@sectionlist,$topsection);
258 }
259 else
260 {
261 my $thissection = $doc_obj->get_next_section($topsection);
262 while (defined $thissection)
263 {
264 push(@sectionlist,$thissection);
265 $thissection = $doc_obj->get_next_section ($thissection);
266 }
267 }
268
269 my $thissection;
270 foreach $thissection (@sectionlist)
271 {
272 my $full_doc_OID
273 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
274 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
275 {
276 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
277 }
278 $self->{'list'}->{$full_doc_OID} = [];
279 $self->{'listmetavalue'}->{$full_doc_OID} = [];
280 $self->{'list_mvpair'}->{$full_doc_OID} = [];
281
282 my $metavalues = [];
283 foreach my $cmn (@commameta_list) {
284 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
285 push(@$metavalues,@$cmvalues) if (@{$cmvalues});
286 last if (@{$cmvalues} && !$self->{'allvalues'});
287 }
288
289 my $metavalue;
290 foreach $metavalue (@$metavalues)
291 {
292 # Tidy up use of white space in metavalue for classifying
293 $metavalue =~ s/^\s*//s;
294 $metavalue =~ s/\s*$//s;
295 $metavalue =~ s/\n/ /s;
296 $metavalue =~ s/\s{2,}/ /s;
297
298 # if this document doesn't contain the metadata element we're
299 # sorting by we won't include it in this classification
300 if (defined $metavalue && $metavalue =~ /\w/)
301 {
302 if (defined($self->{'removeprefix'}) &&
303 length($self->{'removeprefix'})) {
304 $metavalue =~ s/^$self->{'removeprefix'}//;
305
306 # check that it's not now empty
307 if (!$metavalue) {next;}
308 }
309
310 if (defined($self->{'removesuffix'}) &&
311 length($self->{'removesuffix'})) {
312 $metavalue =~ s/$self->{'removesuffix'}$//;
313
314 # check that it's not now empty
315 if (!$metavalue) {next;}
316 }
317
318 my $formatted_metavalue =
319 sorttools::format_metadata_for_sorting($self->{'metaname'},
320 $metavalue, $doc_obj);
321
322 #### prefix-str
323 if (! defined($formatted_metavalue)) {
324 print $outhandle "Warning: AZCompactList: metavalue is ";
325 print $outhandle "empty\n";
326 $formatted_metavalue="";
327 }
328
329 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
330 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
331 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
332 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
333
334
335 last if ($self->{'firstvalueonly'});
336 }
337 }
338
339 # This is used in reclassify below for AZCompactSectionList
340 my $sortmeta = $doc_obj->get_metadata_element($thissection, $self->{'sortname'});
341 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$sortmeta];
342 }
343}
344
345sub reinit
346{
347 my ($self,$classlist_ref) = @_;
348 my $outhandle = $self->{'outhandle'};
349
350 my %mtfreq = ();
351 my @single_classlist = ();
352 my @multiple_classlist = ();
353
354 # find out how often each metavalue occurs
355 map
356 {
357 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
358 {
359### print STDERR "*** plain mv = $mvp->{'mv'}\n";
360### print STDERR "*** format mv = $mvp->{'fmv'}\n";
361
362 my $metavalue = $mvp->{'mv'};
363 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
364 $mtfreq{$metavalue}++;
365 }
366 } @$classlist_ref;
367
368 # use this information to split the list: single metavalue/repeated value
369 map
370 {
371 my $i = 1;
372 my $metavalue;
373 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
374 {
375 my $metavalue = $mvp->{'mv'};
376 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
377 my $cs_metavalue = $mvp->{'mv'}; # case sensative
378 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
379 {
380### print STDERR "*** pushing on $cs_metavalue\n";
381 push(@multiple_classlist,[$_,$i,$metavalue]);
382 }
383 else
384 {
385 push(@single_classlist,[$_,$cs_metavalue]);
386 $metavalue =~ tr/[A-Z]/[a-z]/;
387 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
388 }
389 $i++;
390 }
391 } @$classlist_ref;
392
393
394 # Setup sub-classifiers for multiple list
395
396 $self->{'classifiers'} = {};
397
398 my $pm;
399 foreach $pm ("List", "SectionList")
400 {
401 my $listname
402 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
403 if (-e $listname) { require $listname; }
404 else
405 {
406 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
407 die "\n";
408 }
409 }
410
411 # Create classifiers objects for each entry >= mingroup
412 my $metavalue;
413 my $doclevel = $self->{'doclevel'};
414 my $mingroup = $self->{'mingroup'};
415 my @metaname_list = split(/\/|\|/,$self->{'metaname'});
416 my $metaname = shift(@metaname_list);
417 my $hierarchical = 0;
418 if (scalar(@metaname_list) > 1) {
419 $hierarchical = 1;
420 $metaname = join('/',@metaname_list);
421 }
422 foreach $metavalue (keys %mtfreq)
423 {
424 if ($mtfreq{$metavalue} >= $mingroup)
425 {
426 # occurs more often than minimum required to compact into a group
427 my $listclassobj;
428
429 if (!$hierarchical)
430 {
431 my @args;
432 push @args, ("-metadata", "$metaname");
433 # buttonname is also used for the node's title
434 push @args, ("-buttonname", "$metavalue");
435 push @args, ("-sort", $self->{'sortname'});
436
437 if ($doclevel =~ m/^top(level)?/i)
438 {
439 eval ("\$listclassobj = new List(\@args)");
440 warn $@ if $@;
441 }
442 else
443 {
444 eval ("\$listclassobj = new SectionList(\@args)");
445 warn $@ if $@;
446 }
447 }
448 else
449 {
450
451 my @args;
452 push @args, ("-metadata", "$metaname");
453 # buttonname is also used for the node's title
454 push @args, ("-buttonname", "$metavalue");
455 push @args, ("-sort", $self->{'sortname'});
456
457 if (defined $self->{'removeprefix'}) {
458 push @args, ("-removeprefix", $self->{'removeprefix'});
459 }
460 if (defined $self->{'removesuffix'}) {
461 push @args, ("-removesuffix", $self->{'removesuffix'});
462 }
463
464 push @args, ("-doclevel", "$doclevel");
465 push @args, ("-mingroup", $mingroup);
466 push @args, "-recopt ";
467
468 eval ("\$listclassobj = new AZCompactList(\@args)");
469 warn $@ if $@;
470 }
471 if ($@) {
472 print $outhandle "$@";
473 die "\n";
474 }
475
476 $listclassobj->init();
477
478 if (defined $metavalue && $metavalue =~ /\w/)
479 {
480 my $formatted_node = $metavalue;
481
482 if (defined($self->{'removeprefix'}) &&
483 length($self->{'removeprefix'})) {
484 $formatted_node =~ s/^$self->{'removeprefix'}//;
485 # check that it's not now empty
486 if (!$formatted_node) {next;}
487 }
488 if (defined($self->{'removesuffix'}) &&
489 length($self->{'removesuffix'})) {
490 $formatted_node =~ s/$self->{'removesuffix'}$//;
491 # check that it's not now empty
492 if (!$formatted_node) {next;}
493 }
494
495 $formatted_node = &sorttools::format_metadata_for_sorting($self->{'metaname'}, $formatted_node);
496
497 # In case our formatted string is empty...
498 if (! defined($formatted_node)) {
499 print $outhandle "Warning: AZCompactList: metavalue is ";
500 print $outhandle "empty\n";
501 $formatted_node="";
502 }
503
504 # use the lower case, for speed of lookup.
505 my $meta_lc=lc($metavalue);
506 $self->{'classifiers'}->{$meta_lc}
507 = { 'classifyobj' => $listclassobj,
508 'formattednode' => $formatted_node };
509 }
510 }
511 }
512
513
514 return (\@single_classlist,\@multiple_classlist);
515}
516
517
518sub reclassify
519{
520 my ($self,$multiple_cl_ref) = @_;
521
522 # Entries in the current classify list that are "book nodes"
523 # should be recursively classified.
524 #--
525 foreach $dm_pair (@$multiple_cl_ref)
526 {
527 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
528 my $listclassobj;
529
530 # find metavalue in list of sub-classifiers
531 # check if we have a key (lower case) for this metadata value
532 my $node_name=lc($metavalue);
533 if (exists $self->{'classifiers'}->{$node_name})
534 {
535 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
536
537 # record the metadata value offset temporarily, so eg AZList can
538 # get the correct metadata value (for multi-valued metadata fields)
539 $doc_obj->{'mdoffset'}=$mdoffset;
540
541 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
542 {
543 my $section=$1;
544 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
545 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
546 ->classify($doc_obj, "Section=$section");
547 } else { # section level
548 # Thanks to Don Gourley for this...
549 # classify can't handle multi-level section
550 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
551 ->classify_section($section, $doc_obj, $sortmeta);
552 }
553 }
554 else
555 {
556 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
557 ->classify($doc_obj);
558 }
559 } else { # this key is not in the hash
560 my $outhandle=$self->{outhandle};
561 print $outhandle "Warning: AZCompactList::reclassify ";
562 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
563 }
564 }
565}
566
567
568
569sub get_reclassify_info
570{
571 my $self = shift (@_);
572
573 my $node_name;
574 foreach $node_name (keys %{$self->{'classifiers'}})
575 {
576 my $classifyinfo
577 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
578 ->get_classify_info();
579 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
580 = $classifyinfo;
581 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
582 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
583 }
584}
585
586
587sub alpha_numeric_cmp
588{
589 my ($self,$a,$b) = @_;
590
591 my $title_a = $self->{'reclassifylist'}->{$a};
592 my $title_b = $self->{'reclassifylist'}->{$b};
593
594 if ($title_a =~ m/^(\d+(\.\d+)?)/)
595 {
596 my $val_a = $1;
597 if ($title_b =~ m/^(\d+(\.\d+)?)/)
598 {
599 my $val_b = $1;
600 if ($val_a != $val_b)
601 {
602 return ($val_a <=> $val_b);
603 }
604 }
605 }
606
607 return ($title_a cmp $title_b);
608}
609
610sub frequency_cmp
611{
612 my ($self,$a,$b) = @_;
613
614
615 my $title_a = $self->{'reclassifylist'}->{$a};
616 my $title_b = $self->{'reclassifylist'}->{$b};
617
618 my $a_freq = 1;
619 my $b_freq = 1;
620
621 if ($a =~ m/^CLASSIFY\.(.*)$/)
622 {
623 my $a_node = $1;
624 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
625 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
626 }
627
628 if ($b =~ m/^CLASSIFY\.(.*)$/)
629 {
630 my $b_node = $1;
631 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
632 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
633 }
634
635 return $b_freq <=> $a_freq;
636}
637
638sub get_classify_info {
639 my $self = shift (@_);
640
641 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
642
643 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
644 $self->reclassify($multiple_cl_ref);
645 $self->get_reclassify_info();
646
647 my @reclassified_classlist;
648 if ($self->{'freqsort'})
649 {
650 @reclassified_classlist
651 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
652 # supress sub-grouping by alphabet
653 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
654 }
655 else
656 {
657# @reclassified_classlist
658# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
659
660 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
661
662 @reclassified_classlist
663 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
664
665 }
666
667 return $self->splitlist (\@reclassified_classlist);
668}
669
670sub get_entry {
671 my $self = shift (@_);
672 my ($title, $childtype, $metaname, $thistype) = @_;
673
674 # organise into classification structure
675 my %classifyinfo = ('childtype'=>$childtype,
676 'Title'=>$title,
677 'contains'=>[],
678 'mdtype'=>$metaname);
679
680 $classifyinfo{'thistype'} = $thistype
681 if defined $thistype && $thistype =~ /\w/;
682
683 return \%classifyinfo;
684}
685
686
687
688# splitlist takes an ordered list of classifications (@$classlistref) and
689# splits it up into alphabetical sub-sections.
690sub splitlist {
691 my $self = shift (@_);
692 my ($classlistref) = @_;
693 my $classhash = {};
694
695 # top level
696 my @metanames = split(/\/|\|/,$self->{'metaname'});
697 my $metaname = shift(@metanames);
698
699 my $childtype = "HList";
700 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
701
702 my $classifyinfo;
703 if (!defined($self->{'recopt'}))
704 {
705 my $title = $self->{'title'}; # should always be defined by now....
706 $title = $metaname unless defined $title;
707 $classifyinfo
708 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
709 }
710 else
711 {
712 my $title = $self->{'title'};
713 $classifyinfo
714 = $self->get_entry ($title, $childtype, $metaname, "VList");
715 }
716
717 # don't need to do any splitting if there are less than 'minnesting' classifications
718 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
719 foreach $subOID (@$classlistref) {
720 if ($subOID =~ /^CLASSIFY\.(.*)$/
721 && defined $self->{'classifiers'}->{$1})
722 {
723 push (@{$classifyinfo->{'contains'}},
724 $self->{'classifiers'}->{$1}->{'classifyinfo'});
725 }
726 else
727 {
728 $subOID =~ s/^Metavalue_(\d+)\.//;
729 my $metaname_offset = $1 -1;
730 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
731 push (@{$classifyinfo->{'contains'}}, $oid_rec);
732 }
733 }
734 return $classifyinfo;
735 }
736
737 # first split up the list into separate A-Z and 0-9 classifications
738 foreach $classification (@$classlistref) {
739 my $title = $self->{'reclassifylist'}->{$classification};
740 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
741 $title =~ s/^\W+//g; # remove leading non-word chars
742
743 # only want first character for classification
744 $title =~ m/^(.)/; # always a char, or first byte of wide char?
745 if (defined($1) && $1 ne "") {
746 $title=$1;
747 } else {
748 print STDERR "no first character found for \"$title\" - \"" .
749 $self->{'reclassifylist'}->{$classification} . "\"\n";
750 }
751 $title =~ tr/[a-z]/[A-Z]/;
752
753 if ($title =~ /^[0-9]$/) {$title = '0-9';}
754 elsif ($title !~ /^[A-Z]$/) {
755 my $outhandle = $self->{'outhandle'};
756 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
757 }
758 $classhash->{$title} = [] unless defined $classhash->{$title};
759 push (@{$classhash->{$title}}, $classification);
760 }
761 $classhash = $self->compactlist ($classhash);
762
763 my @tmparr = ();
764 foreach $subsection (sort keys (%$classhash)) {
765 push (@tmparr, $subsection);
766 }
767
768 # if there's a 0-9 section it will have been sorted to the beginning
769 # but we want it at the end
770 if ($tmparr[0] eq '0-9') {
771 shift @tmparr;
772 push (@tmparr, '0-9');
773 }
774 foreach $subclass (@tmparr)
775 {
776 my $tempclassify
777 = (scalar(@tmparr)==1)
778 ? ($self->get_entry(" ", "VList", $metaname))
779 : ($self->get_entry($subclass, "VList", $metaname));
780
781
782 foreach $subsubOID (@{$classhash->{$subclass}})
783 {
784 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
785 && defined $self->{'classifiers'}->{$1})
786 {
787 # this is a "bookshelf" node... >1 entry compacted
788 push (@{$tempclassify->{'contains'}},
789 $self->{'classifiers'}->{$1}->{'classifyinfo'});
790 # set the metadata field name, for mdoffset to work
791 $self->{'classifiers'}->{$1}->{'classifyinfo'}->{'mdtype'}=
792 $metaname;
793 }
794 else
795 {
796 $subsubOID =~ s/^Metavalue_(\d+)\.//;
797 # record the offset if this metadata type has multiple values
798 my $metaname_offset = $1 -1;
799 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
800 push (@{$tempclassify->{'contains'}}, $oid_rec);
801 }
802 }
803 push (@{$classifyinfo->{'contains'}}, $tempclassify);
804 }
805
806 return $classifyinfo;
807}
808
809sub compactlist {
810 my $self = shift (@_);
811 my ($classhashref) = @_;
812 my $compactedhash = {};
813 my @currentOIDs = ();
814 my $currentfirstletter = "";
815 my $currentlastletter = "";
816 my $lastkey = "";
817
818 # minimum and maximum documents to be displayed per page.
819 # the actual maximum will be max + (min-1).
820 # the smallest sub-section is a single letter at present
821 # so in this case there may be many times max documents
822 # displayed on a page.
823 my $min = $self->{'mincompact'};
824 my $max = $self->{'maxcompact'};
825
826 foreach $subsection (sort keys %$classhashref) {
827 if ($subsection eq '0-9') {
828 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
829 next;
830 }
831 $currentfirstletter = $subsection if $currentfirstletter eq "";
832 if ((scalar (@currentOIDs) < $min) ||
833 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
834 push (@currentOIDs, @{$classhashref->{$subsection}});
835 $currentlastletter = $subsection;
836 } else {
837
838 if ($currentfirstletter eq $currentlastletter) {
839 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
840 $lastkey = $currentfirstletter;
841 } else {
842 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
843 $lastkey = "$currentfirstletter-$currentlastletter";
844 }
845 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
846 $compactedhash->{$subsection} = $classhashref->{$subsection};
847 @currentOIDs = ();
848 $currentfirstletter = "";
849 $lastkey=$subsection;
850 } else {
851 @currentOIDs = @{$classhashref->{$subsection}};
852 $currentfirstletter = $subsection;
853 $currentlastletter = $subsection;
854 }
855 }
856 }
857
858 # add final OIDs to last sub-classification if there aren't many otherwise
859 # add final sub-classification
860
861 # don't add if there aren't any oids
862 if (! scalar (@currentOIDs)) {return $compactedhash;}
863
864 if (scalar (@currentOIDs) < $min) {
865 my ($newkey) = $lastkey =~ /^(.)/;
866 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
867 delete $compactedhash->{$lastkey};
868 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
869 } else {
870 if ($currentfirstletter eq $currentlastletter) {
871 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
872 } else {
873 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
874 }
875 }
876
877 return $compactedhash;
878}
879
8801;
881
882
Note: See TracBrowser for help on using the repository browser.