source: main/trunk/greenstone2/perllib/classify/AZCompactSectionList.pm@ 28250

Last change on this file since 28250 was 28199, checked in by kjdon, 11 years ago

make the doclevel default be section - a sectionlist by default should classify sections. and don't have metaname field, have metadata field.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.8 KB
Line 
1###########################################################################
2#
3# AZCompactSectionList.pm --
4#
5# Experimental AZCompactList with fixes to handle section-level metadata
6#
7###########################################################################
8
9package AZCompactSectionList;
10
11use AZCompactList;
12use strict;
13no strict 'refs'; # allow filehandles to be variables and viceversa
14
15sub BEGIN {
16 @AZCompactSectionList::ISA = ('AZCompactList');
17}
18
19my $arguments = [
20 { 'name' => "doclevel",
21 'desc' => "{AZCompactList.doclevel}",
22 'type' => "enum",
23 'list' => $AZCompactList::doclevel_list,
24 'deft' => "section",
25 'reqd' => "no" },
26 ];
27my $options =
28{ 'name' => "AZCompactSectionList",
29 'desc' => "{AZCompactSectionList.desc}",
30 'abstract' => "no",
31 'inherits' => "yes",
32 'args' => $arguments };
33
34sub new {
35 my ($class) = shift (@_);
36 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
37 push(@$classifierslist, $class);
38
39 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
40 push(@{$hashArgOptLists->{"OptList"}},$options);
41
42 my $self = new AZCompactList($classifierslist, $inputargs, $hashArgOptLists);
43
44 return bless $self, $class;
45}
46
47#
48# override reinit() & reclassify() to demonstrate possible bug fixes
49# (search for SECTIONFIX? to see lines changed from AZCompactList.pm)
50#
51sub reinit
52{
53 my ($self,$classlist_ref) = @_;
54
55 my %mtfreq = ();
56 my @single_classlist = ();
57 my @multiple_classlist = ();
58
59 # find out how often each metavalue occurs
60 map
61 {
62 my $mv;
63 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
64 {
65 $mtfreq{$mv}++;
66 }
67 } @$classlist_ref;
68
69 # use this information to split the list: single metavalue/repeated value
70 map
71 {
72 my $i = 1;
73 my $metavalue;
74 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
75 {
76 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
77 {
78 push(@multiple_classlist,[$_,$i,$metavalue]);
79 }
80 else
81 {
82 push(@single_classlist,[$_,$metavalue]);
83 $metavalue =~ tr/[A-Z]/[a-z]/;
84 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
85 }
86 $i++;
87 }
88 } @$classlist_ref;
89
90
91 # Setup sub-classifiers for multiple list
92
93 $self->{'classifiers'} = {};
94
95 my $pm;
96 foreach $pm ("SimpleList", "SectionList")
97 {
98 my $listname
99 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
100 if (-e $listname) { require $listname; }
101 else
102 {
103 my $outhandle = $self->{'outhandle'};
104 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
105 die "\n";
106 }
107 }
108
109 # Create classifiers objects for each entry >= mingroup
110 my $metavalue;
111 foreach $metavalue (keys %mtfreq)
112 {
113 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
114 {
115 my $listclassobj;
116 my $doclevel = $self->{'doclevel'};
117 my $metaname = $self->{'metadata'};
118 my @metaname_list = split('/',$metaname);
119 $metaname = shift(@metaname_list);
120 if (@metaname_list==0)
121 {
122 my @args;
123 push @args, ("-metadata", "$metaname");
124# buttonname is also used for the node's title
125 push @args, ("-buttonname", "$metavalue");
126 push @args, ("-sort", $self->{'sort'});
127
128 my $ptArgs = \@args;
129 if ($doclevel =~ m/^top(level)?/i)
130 {
131 eval ("\$listclassobj = new SimpleList([],\$ptArgs)"); warn $@ if $@;
132 }
133 else
134 {
135 # SECTIONFIX?
136 #eval ("\$listclassobj = new SectionList($args)");
137 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
138 }
139 }
140 else
141 {
142 $metaname = join('/',@metaname_list);
143
144 my @args;
145 push @args, ("-metadata", "$metaname");
146# buttonname is also used for the node's title
147 push @args, ("-buttonname", "$metavalue");
148 push @args, ("-doclevel", "$doclevel");
149 push @args, "-recopt";
150
151 # SECTIONFIX?
152 #eval ("\$listclassobj = new AZCompactList($args)");
153 my $ptArgs = \@args;
154 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
155 }
156 if ($@) {
157 my $outhandle = $self->{'outhandle'};
158 print $outhandle "$@";
159 die "\n";
160 }
161
162 $listclassobj->init();
163
164 if (defined $metavalue && $metavalue =~ /\w/)
165 {
166 my $formatted_node = $metavalue;
167 if ($self->{'metadata'} =~ m/^Creator(:.*)?$/)
168 {
169 &sorttools::format_string_name_en(\$formatted_node);
170 }
171 else
172 {
173 &sorttools::format_string_en(\$formatted_node);
174 }
175
176 $self->{'classifiers'}->{$metavalue}
177 = { 'classifyobj' => $listclassobj,
178 'formattednode' => $formatted_node };
179 }
180 }
181 }
182
183
184 return (\@single_classlist,\@multiple_classlist);
185}
186
187
188sub reclassify
189{
190 my ($self,$multiple_cl_ref) = @_;
191
192 # Entries in the current classify list that are "book nodes"
193 # should be recursively classified.
194 #--
195 foreach my $dm_pair (@$multiple_cl_ref)
196 {
197 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
198 my $listclassobj;
199
200 # find metavalue in list of sub-classifiers
201 my $found = 0;
202 my $node_name;
203 foreach $node_name (keys %{$self->{'classifiers'}})
204 {
205 my $resafe_node_name = $node_name;
206 $resafe_node_name =~ s/(\(|\)|\[|\]|\{|\}|\^|\$|\.|\+|\*|\?|\|)/\\$1/g;
207 if ($metavalue =~ m/^$resafe_node_name$/i)
208 {
209 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
210
211 # SECTIONFIX? section must include multiple levels, e.g. '1.12'
212 #if ($doc_OID =~ m/^.*\.(\d+)$/)
213 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
214 {
215 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
216 # SECTIONFIX? classify can't handle multi-level section
217 #->classify($doc_obj, "Section=$1");
218 ->classify_section($1, $doc_obj, $sortmeta);
219 }
220 else
221 {
222 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
223 ->classify($doc_obj);
224 }
225
226 $found = 1;
227 last;
228 }
229 }
230
231 if (!$found)
232 {
233 my $outhandle = $self->{'outhandle'};
234 print $outhandle "Warning: AZCompactList::reclassify ";
235 print $outhandle "could not find sub-node for $metavalue with doc_OID $doc_OID\n";
236 }
237 }
238}
239
2401;
Note: See TracBrowser for help on using the repository browser.