source: main/trunk/greenstone2/perllib/classify/AZCompactList.pm@ 23116

Last change on this file since 23116 was 23116, checked in by kjdon, 14 years ago

for incremental build, classifiers are not really done incrementally. Previously, we reconstructed all the docs from the database, and classified them, then processed any new/edited/deleted docs, updating the classifier as necessary. Now, we process all new/updated docs, then reconstruct the docs from the database, but only classify those not changed/deleted. This means that we are only ever adding docs to a classifier, never updating or deleting. I have removed edit_mode and all code handling deleting stuff from the classifier.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.2 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
26
27package AZCompactList;
28
29use BaseClassifier;
30use sorttools;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35sub BEGIN {
36 @AZCompactList::ISA = ('BaseClassifier');
37}
38
39my $doclevel_list =
40 [ { 'name' => "top",
41 'desc' => "{AZCompactList.doclevel.top}" },
42 { 'name' => "section",
43 'desc' => "{AZCompactList.doclevel.section}" } ];
44
45my $arguments =
46 [ { 'name' => "metadata",
47 'desc' => "{AZCompactList.metadata}",
48 'type' => "metadata",
49 'reqd' => "yes" },
50 { 'name' => "firstvalueonly",
51 'desc' => "{AZCompactList.firstvalueonly}",
52 'type' => "flag",
53 'reqd' => "no" },
54 { 'name' => "allvalues",
55 'desc' => "{AZCompactList.allvalues}",
56 'type' => "flag",
57 'reqd' => "no" },
58 { 'name' => "sort",
59 'desc' => "{AZCompactList.sort}",
60 'type' => "metadata",
61# 'deft' => "Title",
62 'reqd' => "no" },
63 { 'name' => "removeprefix",
64 'desc' => "{BasClas.removeprefix}",
65 'type' => "regexp",
66 'deft' => "",
67 'reqd' => "no" },
68 { 'name' => "removesuffix",
69 'desc' => "{BasClas.removesuffix}",
70 'type' => "regexp",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "mingroup",
74 'desc' => "{AZCompactList.mingroup}",
75 'type' => "int",
76 'deft' => "1",
77 'range' => "1,",
78 'reqd' => "no" },
79 { 'name' => "minnesting",
80 'desc' => "{AZCompactList.minnesting}",
81 'type' => "int",
82 'deft' => "20",
83 'range' => "2,",
84 'reqd' => "no" },
85 { 'name' => "mincompact",
86 'desc' => "{AZCompactList.mincompact}",
87 'type' => "int",
88 'deft' => "10",
89 'range' => "1,",
90 'reqd' => "no" },
91 { 'name' => "maxcompact",
92 'desc' => "{AZCompactList.maxcompact}",
93 'type' => "int",
94 'deft' => "30",
95 'range' => "1,",
96 'reqd' => "no" },
97 { 'name' => "doclevel",
98 'desc' => "{AZCompactList.doclevel}",
99 'type' => "enum",
100 'list' => $doclevel_list,
101 'deft' => "top",
102 'reqd' => "no" },
103 { 'name' => "freqsort",
104 'desc' => "{AZCompactList.freqsort}",
105 'type' => "flag"},
106 { 'name' => "recopt",
107 'desc' => "{AZCompactList.recopt}",
108 'type' => "flag",
109 'reqd' => "no" } ];
110
111my $options =
112{ 'name' => "AZCompactList",
113 'desc' => "{AZCompactList.desc}",
114 'abstract' => "no",
115 'inherits' => "yes",
116 'args' => $arguments };
117
118
119sub new {
120 my ($class) = shift (@_);
121 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
122 push(@$pluginlist, $class);
123
124 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
125 push(@{$hashArgOptLists->{"OptList"}},$options);
126
127 my $self = new BaseClassifier($pluginlist, $inputargs, $hashArgOptLists);
128
129 if ($self->{'info_only'}) {
130 # don't worry about any options etc
131 return bless $self, $class;
132 }
133
134 if (!$self->{"metadata"}) {
135 my $outhandle = $self->{'outhandle'};
136 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
137 $self->print_txt_usage("");
138 die "AZCompactList Error: required option -metadata not supplied\n";
139 }
140
141 $self->{'metadata'} = $self->strip_ex_from_metadata($self->{'metadata'});
142 # Manually set $self parameters.
143 $self->{'list'} = {};
144 $self->{'listmetavalue'} = {};
145 $self->{'list_mvpair'} = {};
146 $self->{'reclassify'} = {};
147 $self->{'reclassifylist'} = {};
148
149 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
150
151 if (defined($self->{"removeprefix"}) && $self->{"removeprefix"}) {
152 $self->{"removeprefix"} =~ s/^\^//; # don't need a leading ^
153 }
154 if (defined($self->{"removesuffix"}) && $self->{"removesuffix"}) {
155 $self->{"removesuffix"} =~ s/\$$//; # don't need a trailing $
156 }
157
158 $self->{'recopt'} = ($self->{'recopt'} == 0) ? undef : "on";
159
160 if (defined $self->{'sort'}) {
161 $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
162 }
163 # Clean out the unused keys
164 if($self->{"removeprefix"} eq "") {delete $self->{"removeprefix"};}
165 if($self->{"removesuffix"} eq "") {delete $self->{"removesuffix"};}
166
167 return bless $self, $class;
168}
169
170sub init
171{
172 my $self = shift (@_);
173
174 $self->{'list'} = {};
175 $self->{'listmetavalue'} = {};
176 $self->{'list_mvpair'} = {};
177 $self->{'reclassify'} = {};
178 $self->{'reclassifylist'} = {};
179}
180
181my $tmp = 0;
182
183sub classify
184{
185 my $self = shift (@_);
186 my ($doc_obj) = @_;
187
188 my $doc_OID = $doc_obj->get_OID();
189 my $outhandle = $self->{'outhandle'};
190
191 my @sectionlist = ();
192 my $topsection = $doc_obj->get_top_section();
193 my $metaname = $self->{'metadata'};
194
195 $metaname =~ s/(\/|\|).*//; # grab first name in n1/n2/n3 or n1|n2|n3 list
196 my @commameta_list = split(/,|;/, $metaname);
197
198 if ($self->{'doclevel'} =~ /^top(level)?/i)
199 {
200 push(@sectionlist,$topsection);
201 }
202 else
203 {
204 my $thissection = $doc_obj->get_next_section($topsection);
205 while (defined $thissection)
206 {
207 push(@sectionlist,$thissection);
208 $thissection = $doc_obj->get_next_section ($thissection);
209 }
210 }
211
212 my $thissection;
213 foreach $thissection (@sectionlist)
214 {
215 my $full_doc_OID
216 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
217
218 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
219 {
220 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
221 }
222 $self->{'list'}->{$full_doc_OID} = [];
223 $self->{'listmetavalue'}->{$full_doc_OID} = [];
224 $self->{'list_mvpair'}->{$full_doc_OID} = [];
225
226 my $metavalues = [];
227 foreach my $cmn (@commameta_list) {
228 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
229 push(@$metavalues,@$cmvalues) if (@{$cmvalues});
230 last if (@{$cmvalues} && !$self->{'allvalues'});
231 }
232
233 my $metavalue;
234 foreach $metavalue (@$metavalues)
235 {
236 # Tidy up use of white space in metavalue for classifying
237 $metavalue =~ s/^\s*//s;
238 $metavalue =~ s/\s*$//s;
239 $metavalue =~ s/\n/ /s;
240 $metavalue =~ s/\s{2,}/ /s;
241
242 # if this document doesn't contain the metadata element we're
243 # sorting by we won't include it in this classification
244 if (defined $metavalue && $metavalue =~ /\w/)
245 {
246 if (defined($self->{'removeprefix'}) &&
247 length($self->{'removeprefix'})) {
248 $metavalue =~ s/^$self->{'removeprefix'}//;
249
250 # check that it's not now empty
251 if (!$metavalue) {next;}
252 }
253
254 if (defined($self->{'removesuffix'}) &&
255 length($self->{'removesuffix'})) {
256 $metavalue =~ s/$self->{'removesuffix'}$//;
257
258 # check that it's not now empty
259 if (!$metavalue) {next;}
260 }
261
262 my $formatted_metavalue;
263 if ($self->{'no_metadata_formatting'}) {
264 $formatted_metavalue = $metavalue;
265 } else {
266 $formatted_metavalue = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $metavalue, $doc_obj);
267 }
268
269 #### prefix-str
270 if (! defined($formatted_metavalue)) {
271 print $outhandle "Warning: AZCompactList: metavalue is ";
272 print $outhandle "empty\n";
273 $formatted_metavalue="";
274 }
275
276 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
277 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
278 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
279 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
280
281
282 last if ($self->{'firstvalueonly'});
283 }
284 }
285
286 # This is used in reclassify below for AZCompactSectionList
287 my $sortmeta = $doc_obj->get_metadata_element($thissection, $self->{'sort'});
288 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$sortmeta];
289 }
290}
291
292sub reinit
293{
294 my ($self,$classlist_ref) = @_;
295 my $outhandle = $self->{'outhandle'};
296
297 my %mtfreq = ();
298 my @single_classlist = ();
299 my @multiple_classlist = ();
300
301 # find out how often each metavalue occurs
302 map
303 {
304 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
305 {
306### print STDERR "*** plain mv = $mvp->{'mv'}\n";
307### print STDERR "*** format mv = $mvp->{'fmv'}\n";
308
309 my $metavalue = $mvp->{'mv'};
310 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
311 $mtfreq{$metavalue}++;
312 }
313 } @$classlist_ref;
314
315 # use this information to split the list: single metavalue/repeated value
316 map
317 {
318 my $i = 1;
319 my $metavalue;
320 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
321 {
322 my $metavalue = $mvp->{'mv'};
323 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
324 my $cs_metavalue = $mvp->{'mv'}; # case sensative
325 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
326 {
327### print STDERR "*** pushing on $cs_metavalue\n";
328 push(@multiple_classlist,[$_,$i,$metavalue]);
329 }
330 else
331 {
332 push(@single_classlist,[$_,$cs_metavalue]);
333 $metavalue =~ tr/[A-Z]/[a-z]/;
334 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
335 }
336 $i++;
337 }
338 } @$classlist_ref;
339
340
341 # Setup sub-classifiers for multiple list
342
343 $self->{'classifiers'} = {};
344
345 my $pm;
346 foreach $pm ("SimpleList", "SectionList")
347 {
348 my $listname
349 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
350 if (-e $listname) { require $listname; }
351 else
352 {
353 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
354 die "\n";
355 }
356 }
357
358 # Create classifiers objects for each entry >= mingroup
359 my $metavalue;
360 my $doclevel = $self->{'doclevel'};
361 my $mingroup = $self->{'mingroup'};
362 my @metaname_list = split(/\/|\|/,$self->{'metadata'});
363 my $metaname = shift(@metaname_list);
364 my $hierarchical = 0;
365 if (scalar(@metaname_list) > 1) {
366 $hierarchical = 1;
367 $metaname = join('/',@metaname_list);
368 }
369 foreach $metavalue (keys %mtfreq)
370 {
371 if ($mtfreq{$metavalue} >= $mingroup)
372 {
373 # occurs more often than minimum required to compact into a group
374 my $listclassobj;
375
376 if (!$hierarchical)
377 {
378 my @args;
379 push @args, ("-metadata", "$metaname");
380 # buttonname is also used for the node's title
381 push @args, ("-buttonname", "$metavalue");
382 push @args, ("-sort", $self->{'sort'});
383
384 my $ptArgs = \@args;
385 if ($doclevel =~ m/^top(level)?/i)
386 {
387 eval ("\$listclassobj = new SimpleList([],\$ptArgs)");
388 }
389 else
390 {
391 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
392 }
393 }
394 else
395 {
396
397 my @args;
398 push @args, ("-metadata", "$metaname");
399 # buttonname is also used for the node's title
400 push @args, ("-buttonname", "$metavalue");
401 push @args, ("-sort", $self->{'sort'});
402
403 if (defined $self->{'removeprefix'}) {
404 push @args, ("-removeprefix", $self->{'removeprefix'});
405 }
406 if (defined $self->{'removesuffix'}) {
407 push @args, ("-removesuffix", $self->{'removesuffix'});
408 }
409
410 push @args, ("-doclevel", "$doclevel");
411 push @args, ("-mingroup", $mingroup);
412 push @args, "-recopt ";
413
414 my $ptArgs = \@args;
415 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
416 }
417
418 if ($@) {
419 print $outhandle "$@";
420 die "\n";
421 }
422
423 $listclassobj->init();
424
425 if (defined $metavalue && $metavalue =~ /\w/)
426 {
427 my $formatted_node = $metavalue;
428
429 if (defined($self->{'removeprefix'}) &&
430 length($self->{'removeprefix'})) {
431 $formatted_node =~ s/^$self->{'removeprefix'}//;
432 # check that it's not now empty
433 if (!$formatted_node) {next;}
434 }
435 if (defined($self->{'removesuffix'}) &&
436 length($self->{'removesuffix'})) {
437 $formatted_node =~ s/$self->{'removesuffix'}$//;
438 # check that it's not now empty
439 if (!$formatted_node) {next;}
440 }
441
442 $formatted_node = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $formatted_node) unless $self->{'no_metadata_formatting'};
443
444 # In case our formatted string is empty...
445 if (! defined($formatted_node)) {
446 print $outhandle "Warning: AZCompactList: metavalue is ";
447 print $outhandle "empty\n";
448 $formatted_node="";
449 }
450
451 # use the lower case, for speed of lookup.
452 my $meta_lc=lc($metavalue);
453 $self->{'classifiers'}->{$meta_lc}
454 = { 'classifyobj' => $listclassobj,
455 'formattednode' => $formatted_node };
456 }
457 }
458 }
459
460
461 return (\@single_classlist,\@multiple_classlist);
462}
463
464
465sub reclassify
466{
467 my ($self,$multiple_cl_ref) = @_;
468
469 # Entries in the current classify list that are "book nodes"
470 # should be recursively classified.
471 #--
472 foreach my $dm_pair (@$multiple_cl_ref)
473 {
474 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
475 my $listclassobj;
476
477 # find metavalue in list of sub-classifiers
478 # check if we have a key (lower case) for this metadata value
479 my $node_name=lc($metavalue);
480 if (exists $self->{'classifiers'}->{$node_name})
481 {
482 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
483
484 # record the metadata value offset temporarily, so eg AZList can
485 # get the correct metadata value (for multi-valued metadata fields)
486 $doc_obj->{'mdoffset'}=$mdoffset;
487
488 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
489 {
490 my $section=$1;
491 if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
492 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
493 ->classify($doc_obj,"add", "Section=$section");
494 } else { # section level
495 # Thanks to Don Gourley for this...
496 # classify can't handle multi-level section
497 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
498 ->classify_section($section, $doc_obj, "add", $sortmeta);
499 }
500 }
501 else
502 {
503 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
504 ->classify($doc_obj,"add");
505 }
506 } else { # this key is not in the hash
507 my $outhandle=$self->{outhandle};
508 print $outhandle "Warning: AZCompactList::reclassify ";
509 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
510 }
511 }
512}
513
514
515
516sub get_reclassify_info
517{
518 my $self = shift (@_);
519
520 my $node_name;
521 foreach $node_name (keys %{$self->{'classifiers'}})
522 {
523 my $classifyinfo
524 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
525 ->get_classify_info();
526 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
527 = $classifyinfo;
528 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
529 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
530 }
531}
532
533
534sub alpha_numeric_cmp
535{
536 my ($self,$a,$b) = @_;
537
538 my $title_a = $self->{'reclassifylist'}->{$a};
539 my $title_b = $self->{'reclassifylist'}->{$b};
540
541 if ($title_a =~ m/^(\d+(\.\d+)?)/)
542 {
543 my $val_a = $1;
544 if ($title_b =~ m/^(\d+(\.\d+)?)/)
545 {
546 my $val_b = $1;
547 if ($val_a != $val_b)
548 {
549 return ($val_a <=> $val_b);
550 }
551 }
552 }
553
554 return ($title_a cmp $title_b);
555}
556
557sub frequency_cmp
558{
559 my ($self,$a,$b) = @_;
560
561
562 my $title_a = $self->{'reclassifylist'}->{$a};
563 my $title_b = $self->{'reclassifylist'}->{$b};
564
565 my $a_freq = 1;
566 my $b_freq = 1;
567
568 if ($a =~ m/^CLASSIFY\.(.*)$/)
569 {
570 my $a_node = $1;
571 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
572 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
573 }
574
575 if ($b =~ m/^CLASSIFY\.(.*)$/)
576 {
577 my $b_node = $1;
578 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
579 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
580 }
581
582 return $b_freq <=> $a_freq;
583}
584
585sub get_classify_info {
586 my $self = shift (@_);
587
588 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
589
590 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
591 $self->reclassify($multiple_cl_ref);
592 $self->get_reclassify_info();
593
594 my @reclassified_classlist;
595 if ($self->{'freqsort'})
596 {
597 @reclassified_classlist
598 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
599 # supress sub-grouping by alphabet
600 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
601 }
602 else
603 {
604# @reclassified_classlist
605# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
606
607 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
608
609 @reclassified_classlist
610 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
611
612 }
613
614 return $self->splitlist (\@reclassified_classlist);
615}
616
617sub get_entry {
618 my $self = shift (@_);
619 my ($title, $childtype, $metaname, $thistype) = @_;
620 # organise into classification structure
621 my %classifyinfo = ('childtype'=>$childtype,
622 'Title'=>$title,
623 'contains'=>[],
624 'mdtype'=>$metaname);
625
626 $classifyinfo{'thistype'} = $thistype
627 if defined $thistype && $thistype =~ /\w/;
628
629 return \%classifyinfo;
630}
631
632
633
634# splitlist takes an ordered list of classifications (@$classlistref) and
635# splits it up into alphabetical sub-sections.
636sub splitlist {
637 my $self = shift (@_);
638 my ($classlistref) = @_;
639 my $classhash = {};
640
641 # top level
642 my @metanames = split(/\/|\|/,$self->{'metadata'});
643 my $metaname = shift(@metanames);
644
645 my $childtype = "HList";
646 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
647
648 my $title = $self->{'buttonname'}; # should always be defined by now.
649 my $classifyinfo;
650 if (!defined($self->{'recopt'}))
651 {
652 $classifyinfo
653 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
654 }
655 else
656 {
657 $classifyinfo
658 = $self->get_entry ($title, $childtype, $metaname, "VList");
659 }
660
661 # don't need to do any splitting if there are less than 'minnesting' classifications
662 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
663 foreach my $subOID (@$classlistref) {
664 if ($subOID =~ /^CLASSIFY\.(.*)$/
665 && defined $self->{'classifiers'}->{$1})
666 {
667 push (@{$classifyinfo->{'contains'}},
668 $self->{'classifiers'}->{$1}->{'classifyinfo'});
669 }
670 else
671 {
672 $subOID =~ s/^Metavalue_(\d+)\.//;
673 my $metaname_offset = $1 -1;
674 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
675 push (@{$classifyinfo->{'contains'}}, $oid_rec);
676 }
677 }
678 return $classifyinfo;
679 }
680
681 # first split up the list into separate A-Z and 0-9 classifications
682 foreach my $classification (@$classlistref) {
683 my $title = $self->{'reclassifylist'}->{$classification};
684 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
685 $title =~ s/^\W+//g; # remove leading non-word chars
686
687 # only want first character for classification
688 $title =~ m/^(.)/; # always a char, or first byte of wide char?
689 if (defined($1) && $1 ne "") {
690 $title=$1;
691 } else {
692 print STDERR "no first character found for \"$title\" - \"" .
693 $self->{'reclassifylist'}->{$classification} . "\"\n";
694 }
695 $title =~ tr/[a-z]/[A-Z]/;
696
697 if ($title =~ /^[0-9]$/) {$title = '0-9';}
698 elsif ($title !~ /^[A-Z]$/) {
699 my $outhandle = $self->{'outhandle'};
700 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
701 }
702 $classhash->{$title} = [] unless defined $classhash->{$title};
703 push (@{$classhash->{$title}}, $classification);
704 }
705 $classhash = $self->compactlist ($classhash);
706
707 my @tmparr = ();
708 foreach my $subsection (sort keys (%$classhash)) {
709 push (@tmparr, $subsection);
710 }
711
712 # if there's a 0-9 section it will have been sorted to the beginning
713 # but we want it at the end
714 if ($tmparr[0] eq '0-9') {
715 shift @tmparr;
716 push (@tmparr, '0-9');
717 }
718 foreach my $subclass (@tmparr)
719 {
720 my $tempclassify
721 = (scalar(@tmparr)==1)
722 ? ($self->get_entry(" ", "VList", $metaname))
723 : ($self->get_entry($subclass, "VList", $metaname));
724
725
726 foreach my $subsubOID (@{$classhash->{$subclass}})
727 {
728 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
729 && defined $self->{'classifiers'}->{$1})
730 {
731 # this is a "bookshelf" node... >1 entry compacted
732 push (@{$tempclassify->{'contains'}},
733 $self->{'classifiers'}->{$1}->{'classifyinfo'});
734 # set the metadata field name, for mdoffset to work
735 $self->{'classifiers'}->{$1}->{'classifyinfo'}->{'mdtype'}=
736 $metaname;
737 }
738 else
739 {
740 $subsubOID =~ s/^Metavalue_(\d+)\.//;
741 # record the offset if this metadata type has multiple values
742 my $metaname_offset = $1 -1;
743 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
744 push (@{$tempclassify->{'contains'}}, $oid_rec);
745 }
746 }
747 push (@{$classifyinfo->{'contains'}}, $tempclassify);
748 }
749
750 return $classifyinfo;
751}
752
753sub compactlist {
754 my $self = shift (@_);
755 my ($classhashref) = @_;
756 my $compactedhash = {};
757 my @currentOIDs = ();
758 my $currentfirstletter = "";
759 my $currentlastletter = "";
760 my $lastkey = "";
761
762 # minimum and maximum documents to be displayed per page.
763 # the actual maximum will be max + (min-1).
764 # the smallest sub-section is a single letter at present
765 # so in this case there may be many times max documents
766 # displayed on a page.
767 my $min = $self->{'mincompact'};
768 my $max = $self->{'maxcompact'};
769
770 foreach my $subsection (sort keys %$classhashref) {
771 if ($subsection eq '0-9') {
772 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
773 next;
774 }
775 $currentfirstletter = $subsection if $currentfirstletter eq "";
776 if ((scalar (@currentOIDs) < $min) ||
777 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
778 push (@currentOIDs, @{$classhashref->{$subsection}});
779 $currentlastletter = $subsection;
780 } else {
781
782 if ($currentfirstletter eq $currentlastletter) {
783 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
784 $lastkey = $currentfirstletter;
785 } else {
786 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
787 $lastkey = "$currentfirstletter-$currentlastletter";
788 }
789 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
790 $compactedhash->{$subsection} = $classhashref->{$subsection};
791 @currentOIDs = ();
792 $currentfirstletter = "";
793 $lastkey=$subsection;
794 } else {
795 @currentOIDs = @{$classhashref->{$subsection}};
796 $currentfirstletter = $subsection;
797 $currentlastletter = $subsection;
798 }
799 }
800 }
801
802 # add final OIDs to last sub-classification if there aren't many otherwise
803 # add final sub-classification
804
805 # don't add if there aren't any oids
806 if (! scalar (@currentOIDs)) {return $compactedhash;}
807
808 if (scalar (@currentOIDs) < $min) {
809 my ($newkey) = $lastkey =~ /^(.)/;
810 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
811 delete $compactedhash->{$lastkey};
812 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
813 } else {
814 if ($currentfirstletter eq $currentlastletter) {
815 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
816 } else {
817 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
818 }
819 }
820
821 return $compactedhash;
822}
823
8241;
825
826
Note: See TracBrowser for help on using the repository browser.