source: trunk/gsdl/perllib/classify/AutoHierarchy.pm@ 9133

Last change on this file since 9133 was 9133, checked in by mdewsnip, 19 years ago

Added the "-hlist_at_top" option from Hierarchy.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.7 KB
Line 
1###########################################################################
2#
3# AutoHierarchy.pm --classifier to create a Hierarchy without the need for a
4# hierarchy file (like Hierarchy)
5# created by Imene, modified by Katherine
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# simple Hierarchical classifier plugin
29# to see the options, run "perl -S classinfo.pl AutoHierarchy"
30
31package AutoHierarchy;
32
33use BasClas;
34use sorttools;
35
36sub BEGIN {
37 @ISA = ('BasClas');
38}
39
40my $arguments =
41 [ { 'name' => "metadata",
42 'desc' => "{AZCompactList.metadata}",
43 'type' => "metadata",
44 'reqd' => "yes" },
45 { 'name' => "firstvalueonly",
46 'desc' => "{AZCompactList.firstvalueonly}",
47 'type' => "flag",
48 'reqd' => "no" },
49 { 'name' => "allvalues",
50 'desc' => "{AZCompactList.allvalues}",
51 'type' => "flag",
52 'reqd' => "no" },
53 { 'name' => "buttonname",
54 'desc' => "{BasClas.buttonname}",
55 'type' => "string",
56 'deft' => "{BasClas.metadata.deft}",
57 'reqd' => "no" },
58 { 'name' => "sort",
59 'desc' => "{Hierarchy.sort}",
60 'type' => "string",
61 'deft' => "{BasClas.metadata.deft}",
62 'reqd' => "no" },
63 { 'name' => "separator",
64 'desc' => "{AutoHierarchy.separator}",
65 'type' => "regexp",
66 'deft' => "[\\\\\\\/|\\\\\\\|]",
67 'reqd' => "no" },
68 { 'name' => "suppresslastlevel",
69 'desc' => "{AutoHierarchy.suppresslastlevel}",
70 'type' => "flag",
71 'reqd' => "no" },
72 { 'name' => "hlist_at_top",
73 'desc' => "{Hierarchy.hlist_at_top}",
74 'type' => "flag",
75 'reqd' => "no" } ];
76
77my $options = { 'name' => "AutoHierarchy",
78 'desc' => "{AutoHierarchy.desc}",
79 'abstract' => "no",
80 'inherits' => "yes",
81 'args' => $arguments };
82
83
84sub new {
85 my $class = shift (@_);
86 my $self = new BasClas($class, @_);
87
88 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
89 my $option_list = $self->{'option_list'};
90 push( @{$option_list}, $options );
91
92 if ($self->{'info_only'}) {
93 # created from classinfo.pl - don't need to parse the arguments
94 return bless $self, $class;
95 }
96
97 my ($metadata, $buttonname, $sortname, $separator);
98 my $firstvalueonly = 0;
99 my $allvalues = 0;
100 my $suppresslastlevel = 0;
101
102 if (!parsargv::parse(\@_,
103 q^metadata/.*/^, \$metadata,
104 q^buttonname/.*/^, \$buttonname,
105 q^sort/.*/^, \$sortname,
106 q^separator/.*/^, \$separator,
107 q^firstvalueonly^, \$firstvalueonly,
108 q^allvalues^, \$allvalues,
109 q^suppresslastlevel^, \$suppresslastlevel,
110 q^hlist_at_top^, \$hlist_at_top,
111 "allow_extra_options")) {
112
113 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
114 $self->print_txt_usage(""); # Use default resource bundle
115 die "\n";
116 }
117
118 if (!$metadata) {
119 print STDERR "$class Error: required option -metadata not supplied\n";
120 $self->print_txt_usage(""); # Use default resource bundle
121
122 die "$class Error: required option -metadata not supplied\n";
123 }
124 my @meta_list = split(/,/, $metadata);
125 $self->{'meta_list'} = \@meta_list;
126
127 if (!$separator) {
128 $separator = "[\\\/|\\\|]";
129 }
130 $self->{'separator'} = $separator;
131
132 $buttonname = $self->generate_title_from_metadata($metadata) unless ($buttonname);
133 $self->{'title'} = $buttonname;
134 $self->{'firstvalueonly'} = $firstvalueonly;
135 $self->{'allvalues'} = $allvalues;
136 $self->{'suppresslastlevel'} = $suppresslastlevel;
137 $self->{'hlist_at_top'} = $hlist_at_top;
138
139 # sortname is handled a bit differently - kjdon
140 # undef means to sort, but use the metadata value from -metadata
141 # because there is no one metadata value to get for sorting when
142 # we have a list of possible metadata
143 # to get no sorting, set sortname = 'nosort'
144 if (!$sortname) {
145 if (defined ($metadata)) {
146 $sortname = undef;
147 } else {
148 $sortname = "nosort";
149 }
150 }
151 $self->{'sortname'} = $sortname;
152
153 # the hash that we use to build up the hierarchy
154 $self->{'path_hash'}={};
155
156 return bless $self, $class;
157}
158
159sub init {
160 my $self = shift (@_);
161
162}
163
164sub classify {
165 my $self = shift (@_);
166 my ($doc_obj) = @_;
167
168 my $doc_OID = $doc_obj->get_OID();
169
170 # are we sorting the list??
171 my $nosort = 0;
172 if (defined $self->{'sortname'} && $self->{'sortname'} eq "nosort") {
173 $nosort = 1;
174 }
175
176 my $metavalues = [];
177 # find all the metadata values
178 foreach $m (@{$self->{'meta_list'}}) {
179 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
180 next unless (@{$mvalues});
181 if ($self->{'onlyfirst'}) {
182 # we only want the first metadata value
183 push (@$metavalues, $mvalues[0]);
184 last;
185 }
186 push (@$metavalues, @$mvalues);
187 last if (!$self->{'allvalues'}); # we don't want to try other elements
188 # cos we have already found some
189 }
190
191 return unless (@$metavalues);
192
193 #check for a sort element other than our metadata
194 my $sortmeta = undef;
195 if (!$nosort && defined $self->{'sortname'}) {
196
197 if ($self->{'sortname'} =~ /^filename$/i) {
198 $sortmeta = $doc_obj->get_source_filename();
199 } else {
200 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sortname'});
201 if (defined $sortmeta) {
202 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sortname'}, $sortmeta, $doc_obj);
203 }
204 }
205 $sortmeta = "" unless defined $sortmeta;
206 }
207
208
209 #Add all the metadata values to the hash
210 my $path_hash;
211 my $current_pos;
212
213 foreach $metavalue (@$metavalues) {
214 $path_hash = $self->{'path_hash'};
215 my @chunks = split (/$self->{'separator'}/, $metavalue);
216 if ($self->{'suppresslastlevel'}) {
217 pop(@chunks); # remove the last element from the end
218 }
219
220 foreach $folderName (@chunks)
221 {
222 if ($folderName ne ""){ #sometimes the tokens are empty
223 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
224 $path_hash = $current_pos->{'nodes'};
225 }
226 }
227 # now add the document, with sort meta if needed
228 if ($nosort) {
229 push(@{$current_pos->{'docs'}}, $doc_OID);
230 } else {
231 if (defined $sortmeta) {
232 # can you ever get the same doc twice in one classification??
233 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
234 } else {
235 $current_pos->{'docs'}->{$doc_OID} = $metavalue;
236 }
237 }
238 } # foreach metadata
239}
240
241sub add_To_Hash {
242 my $self = shift (@_);
243 my ($myhash, $k, $nosort) = @_;
244
245 if (!defined $myhash->{$k}){
246 $myhash->{$k}={};
247 $myhash->{$k}->{'nodes'}={};
248 if ($nosort) {
249 $myhash->{$k}->{'docs'}=[];
250 } else {
251 $myhash->{$k}->{'docs'} = {};
252 }
253 }
254 return $myhash->{$k};
255}
256
257sub print_Hash{
258 my $self = shift (@_);
259 my ($myHash, $num_spaces) = @_;
260
261 foreach my $key (keys %{$myHash}){
262 print "\n";
263 $self->print_spaces($num_spaces);
264 print STDERR "$key*";
265 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
266 }
267}
268
269sub print_spaces{
270 my $self = shift (@_);
271 my ($num_spaces) = @_;
272
273 for ($i = 0; $i < $num_spaces; $i++){
274 print STDERR " ";
275 }
276}
277
278sub get_entry {
279 my $self = shift (@_);
280 my ($title, $childtype, $thistype) = @_;
281
282 # organise into classification structure
283 my %classifyinfo = ('childtype'=>$childtype,
284 'Title'=>$title,
285 'contains'=>[],);
286 $classifyinfo{'thistype'} = $thistype
287 if defined $thistype && $thistype =~ /\w/;
288
289 return \%classifyinfo;
290}
291
292sub process_hash {
293 my $self = shift (@_);
294 my ($top_hash, $top_entry) = @_;
295 my ($entry);
296
297 my $hash = {};
298 foreach my $key (sort keys %{$top_hash}) {
299 $entry = $self->get_entry($key,"VList","VList");
300 my $has_content = 0;
301 my @doc_list;
302 # generate a sorted list of doc ids
303 if ($nosort && scalar(@{$top_hash->{$key}->{'docs'}})) {
304 @doc_list = @{$top_hash->{$key}->{'docs'}};
305 } elsif (!$nosort && (keys %{$top_hash->{$key}->{'docs'}})) {
306 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
307 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
308
309 }
310 # if this key has documents, add them
311 if (@doc_list) {
312 $has_content = 1;
313 foreach $d(@doc_list) {
314 push (@{$entry->{'contains'}}, {'OID'=>$d});
315 }
316 }
317 # if this key has nodes, add them
318 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
319 $has_content = 1;
320 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
321 }
322 # if we have found some content, add the new entry for this key into the parent node
323 if ($has_content) {
324 push (@{$top_entry->{'contains'}}, $entry);
325 }
326
327 }
328}
329
330sub get_classify_info {
331 my $self = shift (@_);
332 my ($no_thistype) = @_;
333 $no_thistype = 0 unless defined $no_thistype;
334
335 my ($classification);
336 my $top_h = $self->{'path_hash'};
337
338 if ($self->{'path_hash'}) {
339 if ($self->{'hlist_at_top'}) {
340 $classification = $self->get_entry ($self->{'title'}, "HList", "Invisible");
341 }
342 else {
343 $classification = $self->get_entry ($self->{'title'}, "VList", "Invisible");
344 }
345 }
346
347 $self->process_hash($top_h, $classification);
348
349 return $classification;
350
351}
352
353
3541;
355
356
357
Note: See TracBrowser for help on using the repository browser.