source: trunk/gsdl/perllib/classify/AZCompactSectionList.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1###########################################################################
2#
3# AZCompactSectionList.pm --
4#
5# Experimental AZCompactList with fixes to handle section-level metadata
6#
7###########################################################################
8
9package AZCompactSectionList;
10
11use AZCompactList;
12
13sub BEGIN {
14 @ISA = ('AZCompactList');
15}
16
17my $options =
18{ 'name' => "AZCompactSectionList",
19 'desc' => "{AZCompactSectionList.desc}",
20 'abstract' => "no",
21 'inherits' => "yes",
22 'args' => $arguments };
23
24sub new {
25 my ($class) = shift (@_);
26 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
27 push(@$classifierslist, $class);
28
29 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
30 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
31
32 my $self = (defined $hashArgOptLists)? new AZCompactList($classifierslist,$inputargs,$hashArgOptLists): new AZCompactList($classifierslist,$inputargs);
33
34 return bless $self, $class;
35}
36
37#
38# override reinit() & reclassify() to demonstrate possible bug fixes
39# (search for SECTIONFIX? to see lines changed from AZCompactList.pm)
40#
41sub reinit
42{
43 my ($self,$classlist_ref) = @_;
44
45 my %mtfreq = ();
46 my @single_classlist = ();
47 my @multiple_classlist = ();
48
49 # find out how often each metavalue occurs
50 map
51 {
52 my $mv;
53 foreach $mv (@{$self->{'listmetavalue'}->{$_}} )
54 {
55 $mtfreq{$mv}++;
56 }
57 } @$classlist_ref;
58
59 # use this information to split the list: single metavalue/repeated value
60 map
61 {
62 my $i = 1;
63 my $metavalue;
64 foreach $metavalue (@{$self->{'listmetavalue'}->{$_}})
65 {
66 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
67 {
68 push(@multiple_classlist,[$_,$i,$metavalue]);
69 }
70 else
71 {
72 push(@single_classlist,[$_,$metavalue]);
73 $metavalue =~ tr/[A-Z]/[a-z]/;
74 $self->{'reclassifylist'}->{"Metavalue_$i.$_"} = $metavalue;
75 }
76 $i++;
77 }
78 } @$classlist_ref;
79
80
81 # Setup sub-classifiers for multiple list
82
83 $self->{'classifiers'} = {};
84
85 my $pm;
86 foreach $pm ("List", "SectionList")
87 {
88 my $listname
89 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/$pm.pm");
90 if (-e $listname) { require $listname; }
91 else
92 {
93 my $outhandle = $self->{'outhandle'};
94 print $outhandle "AZCompactList ERROR - couldn't find classifier \"$listname\"\n";
95 die "\n";
96 }
97 }
98
99 # Create classifiers objects for each entry >= mingroup
100 my $metavalue;
101 foreach $metavalue (keys %mtfreq)
102 {
103 if ($mtfreq{$metavalue} >= $self->{'mingroup'})
104 {
105 my $listclassobj;
106 my $doclevel = $self->{'doclevel'};
107 my $metaname = $self->{'metaname'};
108 my @metaname_list = split('/',$metaname);
109 $metaname = shift(@metaname_list);
110 if (@metaname_list==0)
111 {
112 my @args;
113 push @args, ("-metadata", "$metaname");
114# buttonname is also used for the node's title
115 push @args, ("-buttonname", "$metavalue");
116 push @args, ("-sort", $self->{'sort'});
117
118 my $ptArgs = \@args;
119 if ($doclevel =~ m/^top(level)?/i)
120 {
121 eval ("\$listclassobj = new List([],\$ptArgs)"); warn $@ if $@;
122 }
123 else
124 {
125 # SECTIONFIX?
126 #eval ("\$listclassobj = new SectionList($args)");
127 eval ("\$listclassobj = new SectionList([],\$ptArgs)");
128 }
129 }
130 else
131 {
132 $metaname = join('/',@metaname_list);
133
134 my @args;
135 push @args, ("-metadata", "$metaname");
136# buttonname is also used for the node's title
137 push @args, ("-buttonname", "$metavalue");
138 push @args, ("-doclevel", "$doclevel");
139 push @args, "-recopt";
140
141 # SECTIONFIX?
142 #eval ("\$listclassobj = new AZCompactList($args)");
143 my $ptArgs = \@args;
144 eval ("\$listclassobj = new AZCompactList([],\$ptArgs)");
145 }
146 if ($@) {
147 my $outhandle = $self->{'outhandle'};
148 print $outhandle "$@";
149 die "\n";
150 }
151
152 $listclassobj->init();
153
154 if (defined $metavalue && $metavalue =~ /\w/)
155 {
156 my $formatted_node = $metavalue;
157 if ($self->{'metaname'} =~ m/^Creator(:.*)?$/)
158 {
159 &sorttools::format_string_name_en(\$formatted_node);
160 }
161 else
162 {
163 &sorttools::format_string_en(\$formatted_node);
164 }
165
166 $self->{'classifiers'}->{$metavalue}
167 = { 'classifyobj' => $listclassobj,
168 'formattednode' => $formatted_node };
169 }
170 }
171 }
172
173
174 return (\@single_classlist,\@multiple_classlist);
175}
176
177
178sub reclassify
179{
180 my ($self,$multiple_cl_ref) = @_;
181
182 # Entries in the current classify list that are "book nodes"
183 # should be recursively classified.
184 #--
185 foreach $dm_pair (@$multiple_cl_ref)
186 {
187 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair;
188 my $listclassobj;
189
190 # find metavalue in list of sub-classifiers
191 my $found = 0;
192 my $node_name;
193 foreach $node_name (keys %{$self->{'classifiers'}})
194 {
195 $resafe_node_name = $node_name;
196 $resafe_node_name =~ s/(\(|\)|\[|\]|\{|\}|\^|\$|\.|\+|\*|\?|\|)/\\$1/g;
197 if ($metavalue =~ m/^$resafe_node_name$/i)
198 {
199 my ($doc_obj, $sortmeta) = @{$self->{'reclassify'}->{$doc_OID}};
200
201 # SECTIONFIX? section must include multiple levels, e.g. '1.12'
202 #if ($doc_OID =~ m/^.*\.(\d+)$/)
203 if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
204 {
205 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
206 # SECTIONFIX? classify can't handle multi-level section
207 #->classify($doc_obj, "Section=$1");
208 ->classify_section($1, $doc_obj, $sortmeta);
209 }
210 else
211 {
212 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
213 ->classify($doc_obj);
214 }
215
216 $found = 1;
217 last;
218 }
219 }
220
221 if (!$found)
222 {
223 my $outhandle = $self->{'outhandle'};
224 print $outhandle "Warning: AZCompactList::reclassify ";
225 print $outhandle "could not find sub-node for $metavalue with doc_OID $doc_OID\n";
226 }
227 }
228}
229
2301;
Note: See TracBrowser for help on using the repository browser.