source: trunk/gsdl/perllib/classify/AutoHierarchy.pm@ 9130

Last change on this file since 9130 was 9130, checked in by mdewsnip, 19 years ago

Allowed the use of "|" characters as well as "\" for separating levels, to work with the GLI. Also finished off the "-suppresslastlevel" option, which someone whose name begins with Katherine had forgotten to do.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.6 KB
Line 
1###########################################################################
2#
3# AutoHierarchy.pm --classifier to create a Hierarchy without the need for a
4# hierarchy file (like Hierarchy)
5# created by Imene, modified by Katherine
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# simple Hierarchical classifier plugin
29# to see the options, run "perl -S classinfo.pl AutoHierarchy"
30
31package AutoHierarchy;
32
33use BasClas;
34use sorttools;
35
36sub BEGIN {
37 @ISA = ('BasClas');
38}
39
40my $arguments =
41 [ { 'name' => "metadata",
42 'desc' => "{AZCompactList.metadata}",
43 'type' => "metadata",
44 'reqd' => "yes" },
45 { 'name' => "firstvalueonly",
46 'desc' => "{AZCompactList.firstvalueonly}",
47 'type' => "flag",
48 'reqd' => "no" },
49 { 'name' => "allvalues",
50 'desc' => "{AZCompactList.allvalues}",
51 'type' => "flag",
52 'reqd' => "no" },
53 { 'name' => "buttonname",
54 'desc' => "{BasClas.buttonname}",
55 'type' => "string",
56 'deft' => "{BasClas.metadata.deft}",
57 'reqd' => "no" },
58 { 'name' => "sort",
59 'desc' => "{Hierarchy.sort}",
60 'type' => "string",
61 'deft' => "{BasClas.metadata.deft}",
62 'reqd' => "no" },
63 { 'name' => "separator",
64 'desc' => "{AutoHierarchy.separator}",
65 'type' => "regexp",
66 'deft' => "[\\\\\\\/|\\\\\\\|]",
67 'reqd' => "no" },
68 { 'name' => "suppresslastlevel",
69 'desc' => "{AutoHierarchy.suppresslastlevel}",
70 'type' => "flag",
71 'reqd' => "no" }
72 ];
73
74my $options = { 'name' => "AutoHierarchy",
75 'desc' => "{AutoHierarchy.desc}",
76 'abstract' => "no",
77 'inherits' => "yes",
78 'args' => $arguments };
79
80
81sub new {
82 my $class = shift (@_);
83 my $self = new BasClas($class, @_);
84
85 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
86 my $option_list = $self->{'option_list'};
87 push( @{$option_list}, $options );
88
89 if ($self->{'info_only'}) {
90 # created from classinfo.pl - don't need to parse the arguments
91 return bless $self, $class;
92 }
93
94 my ($metadata, $buttonname, $sortname, $separator);
95 my $firstvalueonly = 0;
96 my $allvalues = 0;
97 my $suppresslastlevel = 0;
98
99 if (!parsargv::parse(\@_,
100 q^metadata/.*/^, \$metadata,
101 q^buttonname/.*/^, \$buttonname,
102 q^sort/.*/^, \$sortname,
103 q^separator/.*/^, \$separator,
104 q^firstvalueonly^, \$firstvalueonly,
105 q^allvalues^, \$allvalues,
106 q^suppresslastlevel^, \$suppresslastlevel,
107 "allow_extra_options")) {
108
109 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
110 $self->print_txt_usage(""); # Use default resource bundle
111 die "\n";
112 }
113
114 if (!$metadata) {
115 print STDERR "$class Error: required option -metadata not supplied\n";
116 $self->print_txt_usage(""); # Use default resource bundle
117
118 die "$class Error: required option -metadata not supplied\n";
119 }
120 my @meta_list = split(/,/, $metadata);
121 $self->{'meta_list'} = \@meta_list;
122
123 if (!$separator) {
124 $separator = "[\\\/|\\\|]";
125 }
126 $self->{'separator'} = $separator;
127
128 $buttonname = $self->generate_title_from_metadata($metadata) unless ($buttonname);
129 $self->{'title'} = $buttonname;
130
131 if ($firstvalueonly != 0) {
132 $firstvalueonly = 1;
133 }
134 $self->{'firstvalueonly'} = $firstvalueonly;
135
136 if ($allvalues != 0) {
137 $allvalues = 1;
138 }
139 $self->{'allvalues'} = $allvalues;
140
141 if ($suppresslastlevel != 0) {
142 $suppresslastlevel = 1;
143 }
144 $self->{'suppresslastlevel'} = $suppresslastlevel;
145
146 # sortname is handled a bit differently - kjdon
147 # undef means to sort, but use the metadata value from -metadata
148 # because there is no one metadata value to get for sorting when
149 # we have a list of possible metadata
150 # to get no sorting, set sortname = 'nosort'
151 if (!$sortname) {
152 if (defined ($metadata)) {
153 $sortname = undef;
154 } else {
155 $sortname = "nosort";
156 }
157 }
158 $self->{'sortname'} = $sortname;
159
160 # the hash that we use to build up the hierarchy
161 $self->{'path_hash'}={};
162
163 return bless $self, $class;
164}
165
166sub init {
167 my $self = shift (@_);
168
169}
170
171sub classify {
172 my $self = shift (@_);
173 my ($doc_obj) = @_;
174
175 my $doc_OID = $doc_obj->get_OID();
176
177 # are we sorting the list??
178 my $nosort = 0;
179 if (defined $self->{'sortname'} && $self->{'sortname'} eq "nosort") {
180 $nosort = 1;
181 }
182
183 my $metavalues = [];
184 # find all the metadata values
185 foreach $m (@{$self->{'meta_list'}}) {
186 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
187 next unless (@{$mvalues});
188 if ($self->{'onlyfirst'}) {
189 # we only want the first metadata value
190 push (@$metavalues, $mvalues[0]);
191 last;
192 }
193 push (@$metavalues, @$mvalues);
194 last if (!$self->{'allvalues'}); # we don't want to try other elements
195 # cos we have already found some
196 }
197
198 return unless (@$metavalues);
199
200 #check for a sort element other than our metadata
201 my $sortmeta = undef;
202 if (!$nosort && defined $self->{'sortname'}) {
203
204 if ($self->{'sortname'} =~ /^filename$/i) {
205 $sortmeta = $doc_obj->get_source_filename();
206 } else {
207 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sortname'});
208 if (defined $sortmeta) {
209 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sortname'}, $sortmeta, $doc_obj);
210 }
211 }
212 $sortmeta = "" unless defined $sortmeta;
213 }
214
215
216 #Add all the metadata values to the hash
217 my $path_hash;
218 my $current_pos;
219
220 foreach $metavalue (@$metavalues) {
221 $path_hash = $self->{'path_hash'};
222 my @chunks = split (/$self->{'separator'}/, $metavalue);
223 if ($self->{'suppresslastlevel'}) {
224 pop(@chunks); # remove the last element from the end
225 }
226
227 foreach $folderName (@chunks)
228 {
229 if ($folderName ne ""){ #sometimes the tokens are empty
230 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
231 $path_hash = $current_pos->{'nodes'};
232 }
233 }
234 # now add the document, with sort meta if needed
235 if ($nosort) {
236 push(@{$current_pos->{'docs'}}, $doc_OID);
237 } else {
238 if (defined $sortmeta) {
239 # can you ever get the same doc twice in one classification??
240 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
241 } else {
242 $current_pos->{'docs'}->{$doc_OID} = $metavalue;
243 }
244 }
245 } # foreach metadata
246}
247
248sub add_To_Hash {
249 my $self = shift (@_);
250 my ($myhash, $k, $nosort) = @_;
251
252 if (!defined $myhash->{$k}){
253 $myhash->{$k}={};
254 $myhash->{$k}->{'nodes'}={};
255 if ($nosort) {
256 $myhash->{$k}->{'docs'}=[];
257 } else {
258 $myhash->{$k}->{'docs'} = {};
259 }
260 }
261 return $myhash->{$k};
262}
263
264sub print_Hash{
265 my $self = shift (@_);
266 my ($myHash, $num_spaces) = @_;
267
268 foreach my $key (keys %{$myHash}){
269 print "\n";
270 $self->print_spaces($num_spaces);
271 print STDERR "$key*";
272 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
273 }
274}
275
276sub print_spaces{
277 my $self = shift (@_);
278 my ($num_spaces) = @_;
279
280 for ($i = 0; $i < $num_spaces; $i++){
281 print STDERR " ";
282 }
283}
284
285sub get_entry {
286 my $self = shift (@_);
287 my ($title, $childtype, $thistype) = @_;
288
289 # organise into classification structure
290 my %classifyinfo = ('childtype'=>$childtype,
291 'Title'=>$title,
292 'contains'=>[],);
293 $classifyinfo{'thistype'} = $thistype
294 if defined $thistype && $thistype =~ /\w/;
295
296 return \%classifyinfo;
297}
298
299sub process_hash {
300 my $self = shift (@_);
301 my ($top_hash, $top_entry) = @_;
302 my ($entry);
303
304 my $hash = {};
305 foreach my $key (sort keys %{$top_hash}) {
306 $entry = $self->get_entry($key,"VList","VList");
307 my $has_content = 0;
308 my @doc_list;
309 # generate a sorted list of doc ids
310 if ($nosort && scalar(@{$top_hash->{$key}->{'docs'}})) {
311 @doc_list = @{$top_hash->{$key}->{'docs'}};
312 } elsif (!$nosort && (keys %{$top_hash->{$key}->{'docs'}})) {
313 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
314 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
315
316 }
317 # if this key has documents, add them
318 if (@doc_list) {
319 $has_content = 1;
320 foreach $d(@doc_list) {
321 push (@{$entry->{'contains'}}, {'OID'=>$d});
322 }
323 }
324 # if this key has nodes, add them
325 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
326 $has_content = 1;
327 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
328 }
329 # if we have found some content, add the new entry for this key into the parent node
330 if ($has_content) {
331 push (@{$top_entry->{'contains'}}, $entry);
332 }
333
334 }
335}
336
337sub get_classify_info {
338 my $self = shift (@_);
339 my ($no_thistype) = @_;
340 $no_thistype = 0 unless defined $no_thistype;
341
342 my ($classification);
343 my $top_h = $self->{'path_hash'};
344
345 if ($self->{'path_hash'}) {
346 $classification = $self->get_entry ($self->{'title'}, "VList", "Invisible");
347 }
348
349 $self->process_hash($top_h, $classification);
350
351 return $classification;
352
353}
354
355
3561;
357
358
359
Note: See TracBrowser for help on using the repository browser.