source: main/trunk/greenstone2/perllib/classify/AZCompactList.pm@ 32283

Last change on this file since 32283 was 29476, checked in by sjs49, 9 years ago

First of 2 commits to get diffcol on the 64 bit Ubuntu that has perl 5.18 to work again for the Multimedia and MARC-Singlefile collections. This commit does a sort keys once in both AZCompactList and MARCPlugin. Without the sort keys change to MARCPlugin, various entries of dc.Meta fields are stored in different orders each time. Without the sort keys change to AZCompactList, nodes with identical titles/names that differ only in capitalisation are sorted in a different order each time.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.9 KB
RevLine 
[1086]1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# classifier plugin for sorting alphabetically
[6975]26
[1086]27package AZCompactList;
28
[24691]29use strict;
30no strict 'refs'; # allow filehandles to be variables and viceversa
31
[17209]32use BaseClassifier;
[1086]33use sorttools;
[28564]34use FileUtils;
[1086]35
[24691]36use Unicode::Normalize;
[10253]37
[1483]38sub BEGIN {
[17209]39 @AZCompactList::ISA = ('BaseClassifier');
[1483]40}
41
[28198]42our $doclevel_list =
[4759]43 [ { 'name' => "top",
[4873]44 'desc' => "{AZCompactList.doclevel.top}" },
[24691]45 { 'name' => "firstlevel",
46 'desc' => "{AZCompactList.doclevel.firstlevel}" },
[4759]47 { 'name' => "section",
[4873]48 'desc' => "{AZCompactList.doclevel.section}" } ];
[3540]49
50my $arguments =
[4759]51 [ { 'name' => "metadata",
[6975]52 'desc' => "{AZCompactList.metadata}",
[3540]53 'type' => "metadata",
[4759]54 'reqd' => "yes" },
[7548]55 { 'name' => "firstvalueonly",
56 'desc' => "{AZCompactList.firstvalueonly}",
57 'type' => "flag",
58 'reqd' => "no" },
59 { 'name' => "allvalues",
60 'desc' => "{AZCompactList.allvalues}",
61 'type' => "flag",
62 'reqd' => "no" },
[6281]63 { 'name' => "sort",
64 'desc' => "{AZCompactList.sort}",
[11665]65 'type' => "metadata",
[10218]66# 'deft' => "Title",
[6281]67 'reqd' => "no" },
[4759]68 { 'name' => "removeprefix",
[9064]69 'desc' => "{BasClas.removeprefix}",
[6408]70 'type' => "regexp",
[4759]71 'deft' => "",
72 'reqd' => "no" },
[4840]73 { 'name' => "removesuffix",
[9064]74 'desc' => "{BasClas.removesuffix}",
[6408]75 'type' => "regexp",
[4840]76 'deft' => "",
77 'reqd' => "no" },
[4759]78 { 'name' => "mingroup",
[4873]79 'desc' => "{AZCompactList.mingroup}",
[3540]80 'type' => "int",
[11407]81 'deft' => "1",
[7577]82 'range' => "1,",
[4759]83 'reqd' => "no" },
84 { 'name' => "minnesting",
[4873]85 'desc' => "{AZCompactList.minnesting}",
[3540]86 'type' => "int",
[4759]87 'deft' => "20",
[7577]88 'range' => "2,",
[4759]89 'reqd' => "no" },
90 { 'name' => "mincompact",
[4873]91 'desc' => "{AZCompactList.mincompact}",
[3540]92 'type' => "int",
[4759]93 'deft' => "10",
[7577]94 'range' => "1,",
[4759]95 'reqd' => "no" },
96 { 'name' => "maxcompact",
[4873]97 'desc' => "{AZCompactList.maxcompact}",
[3540]98 'type' => "int",
[4759]99 'deft' => "30",
[7577]100 'range' => "1,",
[4759]101 'reqd' => "no" },
102 { 'name' => "doclevel",
[4873]103 'desc' => "{AZCompactList.doclevel}",
[3540]104 'type' => "enum",
105 'list' => $doclevel_list,
[4759]106 'deft' => "top",
107 'reqd' => "no" },
108 { 'name' => "freqsort",
[4873]109 'desc' => "{AZCompactList.freqsort}",
[5615]110 'type' => "flag"},
111 { 'name' => "recopt",
112 'desc' => "{AZCompactList.recopt}",
113 'type' => "flag",
114 'reqd' => "no" } ];
[3540]115
116my $options =
117{ 'name' => "AZCompactList",
[5729]118 'desc' => "{AZCompactList.desc}",
[6408]119 'abstract' => "no",
120 'inherits' => "yes",
[3540]121 'args' => $arguments };
122
[1839]123
[1086]124sub new {
[10218]125 my ($class) = shift (@_);
126 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
127 push(@$pluginlist, $class);
[1086]128
[17209]129 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
130 push(@{$hashArgOptLists->{"OptList"}},$options);
[3540]131
[17209]132 my $self = new BaseClassifier($pluginlist, $inputargs, $hashArgOptLists);
[3540]133
[10253]134 if ($self->{'info_only'}) {
135 # don't worry about any options etc
136 return bless $self, $class;
137 }
138
[10218]139 if (!$self->{"metadata"}) {
[1483]140 my $outhandle = $self->{'outhandle'};
[6985]141 print $outhandle "AZCompactList Error: required option -metadata not supplied\n";
142 $self->print_txt_usage("");
143 die "AZCompactList Error: required option -metadata not supplied\n";
[1483]144 }
145
[20429]146 $self->{'metadata'} = $self->strip_ex_from_metadata($self->{'metadata'});
[10218]147 # Manually set $self parameters.
[1483]148 $self->{'list'} = {};
149 $self->{'listmetavalue'} = {};
[7406]150 $self->{'list_mvpair'} = {};
[1483]151 $self->{'reclassify'} = {};
152 $self->{'reclassifylist'} = {};
[9461]153
[10218]154 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
155
156 if (defined($self->{"removeprefix"}) && $self->{"removeprefix"}) {
157 $self->{"removeprefix"} =~ s/^\^//; # don't need a leading ^
[2955]158 }
[10218]159 if (defined($self->{"removesuffix"}) && $self->{"removesuffix"}) {
160 $self->{"removesuffix"} =~ s/\$$//; # don't need a trailing $
[4840]161 }
[10218]162
163 $self->{'recopt'} = ($self->{'recopt'} == 0) ? undef : "on";
[20429]164
165 if (defined $self->{'sort'}) {
166 $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
167 }
[10218]168 # Clean out the unused keys
169 if($self->{"removeprefix"} eq "") {delete $self->{"removeprefix"};}
170 if($self->{"removesuffix"} eq "") {delete $self->{"removesuffix"};}
[3303]171
[1483]172 return bless $self, $class;
[1086]173}
174
175sub init
176{
177 my $self = shift (@_);
178
179 $self->{'list'} = {};
180 $self->{'listmetavalue'} = {};
[7406]181 $self->{'list_mvpair'} = {};
[1086]182 $self->{'reclassify'} = {};
183 $self->{'reclassifylist'} = {};
184}
185
[10253]186my $tmp = 0;
[1086]187
[7835]188sub classify
[1086]189{
190 my $self = shift (@_);
[23116]191 my ($doc_obj) = @_;
[1086]192
193 my $doc_OID = $doc_obj->get_OID();
[19495]194 my $outhandle = $self->{'outhandle'};
[1086]195
[1313]196 my @sectionlist = ();
197 my $topsection = $doc_obj->get_top_section();
[10218]198 my $metaname = $self->{'metadata'};
[1086]199
[9461]200 $metaname =~ s/(\/|\|).*//; # grab first name in n1/n2/n3 or n1|n2|n3 list
[20008]201 my @commameta_list = split(/,|;/, $metaname);
[1086]202
[1313]203 if ($self->{'doclevel'} =~ /^top(level)?/i)
[1086]204 {
[1313]205 push(@sectionlist,$topsection);
206 }
[24691]207 elsif ($self->{'doclevel'} =~ /^first(level)?/i)
[1313]208 {
[24691]209 my $toplevel_children = $doc_obj->get_children($topsection);
210 push(@sectionlist,@$toplevel_children);
211 }
212 else # (all)?section(s)?
213 {
[1313]214 my $thissection = $doc_obj->get_next_section($topsection);
215 while (defined $thissection)
216 {
217 push(@sectionlist,$thissection);
218 $thissection = $doc_obj->get_next_section ($thissection);
219 }
220 }
[1086]221
[1313]222 my $thissection;
223 foreach $thissection (@sectionlist)
224 {
225 my $full_doc_OID
226 = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
[18455]227
[7406]228 if (defined $self->{'list_mvpair'}->{$full_doc_OID})
[1086]229 {
[1483]230 print $outhandle "WARNING: AZCompactList::classify called multiple times for $full_doc_OID\n";
[1086]231 }
[1313]232 $self->{'list'}->{$full_doc_OID} = [];
233 $self->{'listmetavalue'}->{$full_doc_OID} = [];
[7406]234 $self->{'list_mvpair'}->{$full_doc_OID} = [];
[1313]235
[7406]236 my $metavalues = [];
237 foreach my $cmn (@commameta_list) {
238 my $cmvalues = $doc_obj->get_metadata($thissection,$cmn);
[7557]239 push(@$metavalues,@$cmvalues) if (@{$cmvalues});
240 last if (@{$cmvalues} && !$self->{'allvalues'});
[7406]241 }
242
[1313]243 my $metavalue;
244 foreach $metavalue (@$metavalues)
[1086]245 {
[7042]246 # Tidy up use of white space in metavalue for classifying
247 $metavalue =~ s/^\s*//s;
248 $metavalue =~ s/\s*$//s;
249 $metavalue =~ s/\n/ /s;
250 $metavalue =~ s/\s{2,}/ /s;
251
[1313]252 # if this document doesn't contain the metadata element we're
253 # sorting by we won't include it in this classification
254 if (defined $metavalue && $metavalue =~ /\w/)
255 {
[4840]256 if (defined($self->{'removeprefix'}) &&
257 length($self->{'removeprefix'})) {
[2955]258 $metavalue =~ s/^$self->{'removeprefix'}//;
[4225]259
260 # check that it's not now empty
261 if (!$metavalue) {next;}
[2955]262 }
263
[4840]264 if (defined($self->{'removesuffix'}) &&
265 length($self->{'removesuffix'})) {
266 $metavalue =~ s/$self->{'removesuffix'}$//;
267
268 # check that it's not now empty
269 if (!$metavalue) {next;}
270 }
271
[10630]272 my $formatted_metavalue;
273 if ($self->{'no_metadata_formatting'}) {
274 $formatted_metavalue = $metavalue;
275 } else {
276 $formatted_metavalue = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $metavalue, $doc_obj);
277 }
[1313]278
[2955]279 #### prefix-str
280 if (! defined($formatted_metavalue)) {
281 print $outhandle "Warning: AZCompactList: metavalue is ";
282 print $outhandle "empty\n";
283 $formatted_metavalue="";
284 }
[7406]285
286 my $mv_pair = { 'mv' => $metavalue, 'fmv' => $formatted_metavalue };
[1313]287 push(@{$self->{'list'}->{$full_doc_OID}},$formatted_metavalue);
288 push(@{$self->{'listmetavalue'}->{$full_doc_OID}} ,$metavalue);
[7406]289 push(@{$self->{'list_mvpair'}->{$full_doc_OID}},$mv_pair);
[1313]290
[7406]291
[7548]292 last if ($self->{'firstvalueonly'});
[1313]293 }
[1086]294 }
[9519]295
296 # This is used in reclassify below for AZCompactSectionList
[10218]297 my $sortmeta = $doc_obj->get_metadata_element($thissection, $self->{'sort'});
[9519]298 $self->{'reclassify'}->{$full_doc_OID} = [$doc_obj,$sortmeta];
[1086]299 }
300}
301
302sub reinit
303{
304 my ($self,$classlist_ref) = @_;
[2955]305 my $outhandle = $self->{'outhandle'};
[1086]306
307 my %mtfreq = ();
308 my @single_classlist = ();
309 my @multiple_classlist = ();
310
311 # find out how often each metavalue occurs
[1313]312 map
313 {
[7406]314 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}} )
[1313]315 {
[7406]316### print STDERR "*** plain mv = $mvp->{'mv'}\n";
317### print STDERR "*** format mv = $mvp->{'fmv'}\n";
318
319 my $metavalue = $mvp->{'mv'};
[7702]320 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
321 $mtfreq{$metavalue}++;
[1313]322 }
323 } @$classlist_ref;
[1086]324
325 # use this information to split the list: single metavalue/repeated value
326 map
327 {
[1313]328 my $i = 1;
329 my $metavalue;
[7406]330 foreach my $mvp (@{$self->{'list_mvpair'}->{$_}})
[1086]331 {
[7406]332 my $metavalue = $mvp->{'mv'};
[7702]333 $metavalue =~ s!^-!\\-!; # in case it starts with "-"
[29476]334 my $cs_metavalue = $mvp->{'mv'}; # case sensitive
[1313]335 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
336 {
[7406]337### print STDERR "*** pushing on $cs_metavalue\n";
[1313]338 push(@multiple_classlist,[$_,$i,$metavalue]);
339 }
340 else
341 {
[7406]342 push(@single_classlist,[$_,$cs_metavalue]);
[1313]343 $metavalue =~ tr/[A-Z]/[a-z]/;
344 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
345 }
346 $i++;
[1086]347 }
348 } @$classlist_ref;
349
350
351 # Setup sub-classifiers for multiple list
352
353 $self->{'classifiers'} = {};
354
[1313]355 my $pm;
[18566]356 foreach $pm ("SimpleList", "SectionList")
[1313]357 {
358 my $listname
[28564]359 = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
[1313]360 if (-e $listname) { require $listname; }
361 else
362 {
[1483]363 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
364 die "\n";
[1313]365 }
[1086]366 }
367
[1313]368 # Create classifiers objects for each entry >= mingroup
[1086]369 my $metavalue;
[8728]370 my $doclevel = $self->{'doclevel'};
371 my $mingroup = $self->{'mingroup'};
[10218]372 my @metaname_list = split(/\/|\|/,$self->{'metadata'});
[8728]373 my $metaname = shift(@metaname_list);
374 my $hierarchical = 0;
[9461]375 if (scalar(@metaname_list) > 1) {
[8728]376 $hierarchical = 1;
377 $metaname = join('/',@metaname_list);
378 }
[29476]379 foreach $metavalue (sort keys %mtfreq)
[1086]380 {
[8728]381 if ($mtfreq{$metavalue} >= $mingroup)
[1086]382 {
[7835]383 # occurs more often than minimum required to compact into a group
[1086]384 my $listclassobj;
[8728]385
386 if (!$hierarchical)
387 {
[1948]388 my @args;
389 push @args, ("-metadata", "$metaname");
[5615]390 # buttonname is also used for the node's title
[2080]391 push @args, ("-buttonname", "$metavalue");
[10218]392 push @args, ("-sort", $self->{'sort'});
[1948]393
[10218]394 my $ptArgs = \@args;
[1313]395 if ($doclevel =~ m/^top(level)?/i)
396 {
[18566]397 eval ("\$listclassobj = new SimpleList([],\$ptArgs)");
[1313]398 }
399 else
400 {
[24691]401 # first(level)? or (all)?section(s)?
[10218]402 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
[1313]403 }
404 }
405 else
406 {
[1948]407
408 my @args;
409 push @args, ("-metadata", "$metaname");
[5615]410 # buttonname is also used for the node's title
[2080]411 push @args, ("-buttonname", "$metavalue");
[10218]412 push @args, ("-sort", $self->{'sort'});
[10113]413
414 if (defined $self->{'removeprefix'}) {
415 push @args, ("-removeprefix", $self->{'removeprefix'});
416 }
417 if (defined $self->{'removesuffix'}) {
418 push @args, ("-removesuffix", $self->{'removesuffix'});
419 }
420
[1948]421 push @args, ("-doclevel", "$doclevel");
[5615]422 push @args, ("-mingroup", $mingroup);
423 push @args, "-recopt ";
[1313]424
[10218]425 my $ptArgs = \@args;
426 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
[1313]427 }
[10218]428
[1483]429 if ($@) {
430 print $outhandle "$@";
431 die "\n";
432 }
[1086]433
434 $listclassobj->init();
435
436 if (defined $metavalue && $metavalue =~ /\w/)
437 {
438 my $formatted_node = $metavalue;
[2955]439
[4840]440 if (defined($self->{'removeprefix'}) &&
441 length($self->{'removeprefix'})) {
[2955]442 $formatted_node =~ s/^$self->{'removeprefix'}//;
[4225]443 # check that it's not now empty
444 if (!$formatted_node) {next;}
[2955]445 }
[4840]446 if (defined($self->{'removesuffix'}) &&
447 length($self->{'removesuffix'})) {
448 $formatted_node =~ s/$self->{'removesuffix'}$//;
449 # check that it's not now empty
450 if (!$formatted_node) {next;}
451 }
[7406]452
[10630]453 $formatted_node = &sorttools::format_metadata_for_sorting($self->{'metadata'}, $formatted_node) unless $self->{'no_metadata_formatting'};
[2955]454
455 # In case our formatted string is empty...
456 if (! defined($formatted_node)) {
457 print $outhandle "Warning: AZCompactList: metavalue is ";
458 print $outhandle "empty\n";
459 $formatted_node="";
460 }
461
[6482]462 # use the lower case, for speed of lookup.
463 my $meta_lc=lc($metavalue);
464 $self->{'classifiers'}->{$meta_lc}
[1086]465 = { 'classifyobj' => $listclassobj,
466 'formattednode' => $formatted_node };
467 }
468 }
469 }
470
471
472 return (\@single_classlist,\@multiple_classlist);
473}
474
475
476sub reclassify
477{
478 my ($self,$multiple_cl_ref) = @_;
479
[1313]480 # Entries in the current classify list that are "book nodes"
481 # should be recursively classified.
482 #--
[10253]483 foreach my $dm_pair (@$multiple_cl_ref)
[1086]484 {
[7406]485 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair;
[1086]486 my $listclassobj;
[1313]487
[1086]488 # find metavalue in list of sub-classifiers
[6482]489 # check if we have a key (lower case) for this metadata value
490 my $node_name=lc($metavalue);
491 if (exists $self->{'classifiers'}->{$node_name})
[1086]492 {
[9519]493 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
[1086]494
[7835]495 # record the metadata value offset temporarily, so eg AZList can
496 # get the correct metadata value (for multi-valued metadata fields)
497 $doc_obj->{'mdoffset'}=$mdoffset;
498
[6482]499 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
500 {
501 my $section=$1;
[24691]502 if ($self->{'doclevel'} =~ m/^top(level)?/i) { # toplevel
[1313]503 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
[23354]504 ->classify($doc_obj,"Section=$section");
[24691]505 } else {
506 # first(level)? or (all)?section(s)?
507
508 # classify() can't handle multi-level section, so use
509 # classify_section()
510 # ... thanks to Don Gourley for this...
511
[6482]512 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
[23354]513 ->classify_section($section, $doc_obj, $sortmeta);
[1313]514 }
[1086]515 }
[6482]516 else
517 {
518 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
[23354]519 ->classify($doc_obj);
[6482]520 }
521 } else { # this key is not in the hash
[2889]522 my $outhandle=$self->{outhandle};
[1483]523 print $outhandle "Warning: AZCompactList::reclassify ";
[2955]524 print $outhandle "could not find sub-node for metadata=`$metavalue' with doc_OID $doc_OID\n";
[1086]525 }
526 }
527}
528
529
530
531sub get_reclassify_info
532{
533 my $self = shift (@_);
534
535 my $node_name;
536 foreach $node_name (keys %{$self->{'classifiers'}})
537 {
538 my $classifyinfo
539 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
[1313]540 ->get_classify_info();
[1086]541 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
542 = $classifyinfo;
543 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
544 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
545 }
546}
547
548
[1313]549sub alpha_numeric_cmp
550{
551 my ($self,$a,$b) = @_;
552
553 my $title_a = $self->{'reclassifylist'}->{$a};
554 my $title_b = $self->{'reclassifylist'}->{$b};
[7175]555
[1313]556 if ($title_a =~ m/^(\d+(\.\d+)?)/)
557 {
558 my $val_a = $1;
559 if ($title_b =~ m/^(\d+(\.\d+)?)/)
560 {
561 my $val_b = $1;
562 if ($val_a != $val_b)
563 {
564 return ($val_a <=> $val_b);
565 }
566 }
567 }
568
569 return ($title_a cmp $title_b);
570}
571
[3303]572sub frequency_cmp
573{
574 my ($self,$a,$b) = @_;
575
576
577 my $title_a = $self->{'reclassifylist'}->{$a};
578 my $title_b = $self->{'reclassifylist'}->{$b};
579
580 my $a_freq = 1;
581 my $b_freq = 1;
582
583 if ($a =~ m/^CLASSIFY\.(.*)$/)
584 {
585 my $a_node = $1;
586 my $a_nodeinfo = $self->{'classifiers'}->{$a_node}->{'classifyinfo'};
587 $a_freq = scalar(@{$a_nodeinfo->{'contains'}});
588 }
589
590 if ($b =~ m/^CLASSIFY\.(.*)$/)
591 {
592 my $b_node = $1;
593 my $b_nodeinfo = $self->{'classifiers'}->{$b_node}->{'classifyinfo'};
594 $b_freq = scalar(@{$b_nodeinfo->{'contains'}});
595 }
596
597 return $b_freq <=> $a_freq;
598}
599
[1086]600sub get_classify_info {
601 my $self = shift (@_);
602
[7406]603 my @classlist =keys %{$self->{'list_mvpair'}}; # list all doc oids
[1086]604
605 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
606 $self->reclassify($multiple_cl_ref);
607 $self->get_reclassify_info();
608
[3303]609 my @reclassified_classlist;
610 if ($self->{'freqsort'})
611 {
612 @reclassified_classlist
613 = sort { $self->frequency_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
614 # supress sub-grouping by alphabet
615 map { $self->{'reclassifylist'}->{$_} = "A".$self->{'reclassifylist'}; } keys %{$self->{'reclassifylist'}};
616 }
617 else
618 {
619# @reclassified_classlist
620# = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
[1086]621
[3529]622 # alpha_numeric_cmp is slower than "cmp" but handles numbers better ...
[1313]623
[3303]624 @reclassified_classlist
[3529]625 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'reclassifylist'}};
[3303]626
627 }
628
[1086]629 return $self->splitlist (\@reclassified_classlist);
630}
631
[1313]632sub get_entry {
633 my $self = shift (@_);
634 my ($title, $childtype, $metaname, $thistype) = @_;
635 # organise into classification structure
636 my %classifyinfo = ('childtype'=>$childtype,
637 'Title'=>$title,
638 'contains'=>[],
639 'mdtype'=>$metaname);
640
641 $classifyinfo{'thistype'} = $thistype
642 if defined $thistype && $thistype =~ /\w/;
643
644 return \%classifyinfo;
645}
646
647
648
649# splitlist takes an ordered list of classifications (@$classlistref) and
650# splits it up into alphabetical sub-sections.
[1086]651sub splitlist {
652 my $self = shift (@_);
653 my ($classlistref) = @_;
654 my $classhash = {};
655
656 # top level
[10218]657 my @metanames = split(/\/|\|/,$self->{'metadata'});
[1313]658 my $metaname = shift(@metanames);
659
[1086]660 my $childtype = "HList";
[1313]661 $childtype = "VList" if (scalar (@$classlistref) <= $self->{'minnesting'});
[1086]662
[10218]663 my $title = $self->{'buttonname'}; # should always be defined by now.
[1313]664 my $classifyinfo;
665 if (!defined($self->{'recopt'}))
666 {
667 $classifyinfo
[1716]668 = $self->get_entry ($title, $childtype, $metaname, "Invisible");
[1313]669 }
670 else
671 {
672 $classifyinfo
673 = $self->get_entry ($title, $childtype, $metaname, "VList");
674 }
675
676 # don't need to do any splitting if there are less than 'minnesting' classifications
677 if ((scalar @$classlistref) <= $self->{'minnesting'}) {
[10253]678 foreach my $subOID (@$classlistref) {
[1086]679 if ($subOID =~ /^CLASSIFY\.(.*)$/
680 && defined $self->{'classifiers'}->{$1})
681 {
682 push (@{$classifyinfo->{'contains'}},
683 $self->{'classifiers'}->{$1}->{'classifyinfo'});
684 }
685 else
686 {
[1313]687 $subOID =~ s/^Metavalue_(\d+)\.//;
688 my $metaname_offset = $1 -1;
689 my $oid_rec = {'OID'=>$subOID, 'offset'=>$metaname_offset};
690 push (@{$classifyinfo->{'contains'}}, $oid_rec);
[1086]691 }
692 }
693 return $classifyinfo;
694 }
695
696 # first split up the list into separate A-Z and 0-9 classifications
[10253]697 foreach my $classification (@$classlistref) {
[1086]698 my $title = $self->{'reclassifylist'}->{$classification};
699 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
[24691]700 $title =~ s/^(\W|_)+//g; # remove leading non-word chars
[3109]701
[7406]702 # only want first character for classification
[7835]703 $title =~ m/^(.)/; # always a char, or first byte of wide char?
[7704]704 if (defined($1) && $1 ne "") {
[6482]705 $title=$1;
[24691]706
707 # remove any accents on initial character by mapping to Unicode's
708 # normalized decomposed form (accents follow initial letter)
709 # and then pick off the initial letter
710 my $title_decomposed = NFD($title);
711 $title = substr($title_decomposed,0,1);
[6482]712 } else {
713 print STDERR "no first character found for \"$title\" - \"" .
714 $self->{'reclassifylist'}->{$classification} . "\"\n";
715 }
[1086]716 $title =~ tr/[a-z]/[A-Z]/;
717
718 if ($title =~ /^[0-9]$/) {$title = '0-9';}
719 elsif ($title !~ /^[A-Z]$/) {
[1483]720 my $outhandle = $self->{'outhandle'};
721 print $outhandle "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
[1086]722 }
723 $classhash->{$title} = [] unless defined $classhash->{$title};
724 push (@{$classhash->{$title}}, $classification);
725 }
726 $classhash = $self->compactlist ($classhash);
727
728 my @tmparr = ();
[10253]729 foreach my $subsection (sort keys (%$classhash)) {
[1086]730 push (@tmparr, $subsection);
731 }
732
733 # if there's a 0-9 section it will have been sorted to the beginning
734 # but we want it at the end
735 if ($tmparr[0] eq '0-9') {
736 shift @tmparr;
737 push (@tmparr, '0-9');
738 }
[10253]739 foreach my $subclass (@tmparr)
[1086]740 {
[3302]741 my $tempclassify
742 = (scalar(@tmparr)==1)
743 ? ($self->get_entry(" ", "VList", $metaname))
744 : ($self->get_entry($subclass, "VList", $metaname));
745
746
[10253]747 foreach my $subsubOID (@{$classhash->{$subclass}})
[1086]748 {
749 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
750 && defined $self->{'classifiers'}->{$1})
751 {
[7835]752 # this is a "bookshelf" node... >1 entry compacted
[1086]753 push (@{$tempclassify->{'contains'}},
754 $self->{'classifiers'}->{$1}->{'classifyinfo'});
[7835]755 # set the metadata field name, for mdoffset to work
756 $self->{'classifiers'}->{$1}->{'classifyinfo'}->{'mdtype'}=
757 $metaname;
[1086]758 }
759 else
760 {
[1313]761 $subsubOID =~ s/^Metavalue_(\d+)\.//;
[7835]762 # record the offset if this metadata type has multiple values
[1313]763 my $metaname_offset = $1 -1;
764 my $oid_rec = {'OID'=>$subsubOID, 'offset'=>$metaname_offset};
765 push (@{$tempclassify->{'contains'}}, $oid_rec);
[1086]766 }
767 }
768 push (@{$classifyinfo->{'contains'}}, $tempclassify);
769 }
770
771 return $classifyinfo;
772}
773
[1313]774sub compactlist {
775 my $self = shift (@_);
776 my ($classhashref) = @_;
777 my $compactedhash = {};
778 my @currentOIDs = ();
779 my $currentfirstletter = "";
780 my $currentlastletter = "";
781 my $lastkey = "";
782
783 # minimum and maximum documents to be displayed per page.
784 # the actual maximum will be max + (min-1).
785 # the smallest sub-section is a single letter at present
786 # so in this case there may be many times max documents
787 # displayed on a page.
788 my $min = $self->{'mincompact'};
789 my $max = $self->{'maxcompact'};
790
[10253]791 foreach my $subsection (sort keys %$classhashref) {
[1313]792 if ($subsection eq '0-9') {
793 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
794 next;
795 }
796 $currentfirstletter = $subsection if $currentfirstletter eq "";
797 if ((scalar (@currentOIDs) < $min) ||
798 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
799 push (@currentOIDs, @{$classhashref->{$subsection}});
800 $currentlastletter = $subsection;
801 } else {
802
803 if ($currentfirstletter eq $currentlastletter) {
804 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
805 $lastkey = $currentfirstletter;
806 } else {
807 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
808 $lastkey = "$currentfirstletter-$currentlastletter";
809 }
810 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
811 $compactedhash->{$subsection} = $classhashref->{$subsection};
812 @currentOIDs = ();
813 $currentfirstletter = "";
[6084]814 $lastkey=$subsection;
[1313]815 } else {
816 @currentOIDs = @{$classhashref->{$subsection}};
817 $currentfirstletter = $subsection;
818 $currentlastletter = $subsection;
819 }
820 }
821 }
822
823 # add final OIDs to last sub-classification if there aren't many otherwise
824 # add final sub-classification
[5790]825
826 # don't add if there aren't any oids
827 if (! scalar (@currentOIDs)) {return $compactedhash;}
828
[1313]829 if (scalar (@currentOIDs) < $min) {
830 my ($newkey) = $lastkey =~ /^(.)/;
831 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
832 delete $compactedhash->{$lastkey};
833 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
834 } else {
835 if ($currentfirstletter eq $currentlastletter) {
836 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
837 } else {
838 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
839 }
840 }
841
842 return $compactedhash;
843}
844
[1086]8451;
[1313]846
847
Note: See TracBrowser for help on using the repository browser.