source: gs2-extensions/parallel-building/trunk/src/perllib/classify/AZCompactSectionList.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 5.7 KB
Line 
1###########################################################################
2#
3# AZCompactSectionList.pm --
4#
5# Experimental AZCompactList with fixes to handle section-level metadata
6#
7###########################################################################
8
9package AZCompactSectionList;
10
11use AZCompactList;
12use strict;
13no strict 'refs'; # allow filehandles to be variables and viceversa
14
15sub BEGIN {
16 @AZCompactSectionList::ISA = ('AZCompactList');
17}
18
19my $arguments = [
20 ];
21my $options =
22{ 'name' => "AZCompactSectionList",
23 'desc' => "{AZCompactSectionList.desc}",
24 'abstract' => "no",
25 'inherits' => "yes",
26 'args' => $arguments };
27
28sub new {
29 my ($class) = shift (@_);
30 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
31 push(@$classifierslist, $class);
32
33 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
34 push(@{$hashArgOptLists->{"OptList"}},$options);
35
36 my $self = new AZCompactList($classifierslist, $inputargs, $hashArgOptLists);
37
38 return bless $self, $class;
39}
40
41#
42# override reinit() & reclassify() to demonstrate possible bug fixes
43# (search for SECTIONFIX? to see lines changed from AZCompactList.pm)
44#
45sub reinit
46{
47 my ($self,$classlist_ref) = @_;
48
49 my %mtfreq = ();
50 my @single_classlist = ();
51 my @multiple_classlist = ();
52
53 # find out how often each metavalue occurs
54 map
55 {
56 my $mv;
57 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
58 {
59 $mtfreq{$mv}++;
60 }
61 } @$classlist_ref;
62
63 # use this information to split the list: single metavalue/repeated value
64 map
65 {
66 my $i = 1;
67 my $metavalue;
68 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
69 {
70 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
71 {
72 push(@multiple_classlist,[$_,$i,$metavalue]);
73 }
74 else
75 {
76 push(@single_classlist,[$_,$metavalue]);
77 $metavalue =~ tr/[A-Z]/[a-z]/;
78 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
79 }
80 $i++;
81 }
82 } @$classlist_ref;
83
84
85 # Setup sub-classifiers for multiple list
86
87 $self->{'classifiers'} = {};
88
89 my $pm;
90 foreach $pm ("SimpleList", "SectionList")
91 {
92 my $listname
93 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
94 if (-e $listname) { require $listname; }
95 else
96 {
97 my $outhandle = $self->{'outhandle'};
98 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
99 die "\n";
100 }
101 }
102
103 # Create classifiers objects for each entry >= mingroup
104 my $metavalue;
105 foreach $metavalue (keys %mtfreq)
106 {
107 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
108 {
109 my $listclassobj;
110 my $doclevel = $self->{'doclevel'};
111 my $metaname = $self->{'metaname'};
112 my @metaname_list = split('/',$metaname);
113 $metaname = shift(@metaname_list);
114 if (@metaname_list==0)
115 {
116 my @args;
117 push @args, ("-metadata", "$metaname");
118# buttonname is also used for the node's title
119 push @args, ("-buttonname", "$metavalue");
120 push @args, ("-sort", $self->{'sort'});
121
122 my $ptArgs = \@args;
123 if ($doclevel =~ m/^top(level)?/i)
124 {
125 eval ("\$listclassobj = new SimpleList([],\$ptArgs)"); warn $@ if $@;
126 }
127 else
128 {
129 # SECTIONFIX?
130 #eval ("\$listclassobj = new SectionList($args)");
131 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
132 }
133 }
134 else
135 {
136 $metaname = join('/',@metaname_list);
137
138 my @args;
139 push @args, ("-metadata", "$metaname");
140# buttonname is also used for the node's title
141 push @args, ("-buttonname", "$metavalue");
142 push @args, ("-doclevel", "$doclevel");
143 push @args, "-recopt";
144
145 # SECTIONFIX?
146 #eval ("\$listclassobj = new AZCompactList($args)");
147 my $ptArgs = \@args;
148 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
149 }
150 if ($@) {
151 my $outhandle = $self->{'outhandle'};
152 print $outhandle "$@";
153 die "\n";
154 }
155
156 $listclassobj->init();
157
158 if (defined $metavalue && $metavalue =~ /\w/)
159 {
160 my $formatted_node = $metavalue;
161 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
162 {
163 &sorttools::format_string_name_en(\$formatted_node);
164 }
165 else
166 {
167 &sorttools::format_string_en(\$formatted_node);
168 }
169
170 $self->{'classifiers'}->{$metavalue}
171 = { 'classifyobj' => $listclassobj,
172 'formattednode' => $formatted_node };
173 }
174 }
175 }
176
177
178 return (\@single_classlist,\@multiple_classlist);
179}
180
181
182sub reclassify
183{
184 my ($self,$multiple_cl_ref) = @_;
185
186 # Entries in the current classify list that are "book nodes"
187 # should be recursively classified.
188 #--
189 foreach my $dm_pair (@$multiple_cl_ref)
190 {
191 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
192 my $listclassobj;
193
194 # find metavalue in list of sub-classifiers
195 my $found = 0;
196 my $node_name;
197 foreach $node_name (keys %{$self->{'classifiers'}})
198 {
199 my $resafe_node_name = $node_name;
200 $resafe_node_name =~ s/(\(|\)|\[|\]|\{|\}|\^|\$|\.|\+|\*|\?|\|)/\\$1/g;
201 if ($metavalue =~ m/^$resafe_node_name$/i)
202 {
203 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
204
205 # SECTIONFIX? section must include multiple levels, e.g. '1.12'
206 #if ($doc_OID =~ m/^.*\.(\d+)$/)
207 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
208 {
209 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
210 # SECTIONFIX? classify can't handle multi-level section
211 #->classify($doc_obj, "Section=$1");
212 ->classify_section($1, $doc_obj, $sortmeta);
213 }
214 else
215 {
216 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
217 ->classify($doc_obj);
218 }
219
220 $found = 1;
221 last;
222 }
223 }
224
225 if (!$found)
226 {
227 my $outhandle = $self->{'outhandle'};
228 print $outhandle "Warning: AZCompactList::reclassify ";
229 print $outhandle "could not find sub-node for $metavalue with doc_OID $doc_OID\n";
230 }
231 }
232}
233
2341;
Note: See TracBrowser for help on using the repository browser.