source: trunk/gsdl/perllib/classify/AutoHierarchy.pm@ 8728

Last change on this file since 8728 was 8479, checked in by kjdon, 20 years ago

fixed a typo in the arg list which meant it didn't work with -xml

  • Property svn:keywords set to Author Date Id Revision
File size: 9.4 KB
Line 
1###########################################################################
2#
3# AutoHierarchy.pm --classifier to create a Hierarchy without the need for a
4# hierarchy file (like Hierarchy)
5# created by Imene, modified by Katherine
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# simple Hierarchical classifier plugin
29# to see the options, run "perl -S classinfo.pl AutoHierarchy"
30
31package AutoHierarchy;
32
33use BasClas;
34use sorttools;
35
36sub BEGIN {
37 @ISA = ('BasClas');
38}
39
40my $arguments =
41 [ { 'name' => "metadata",
42 'desc' => "{AZCompactList.metadata}",
43 'type' => "metadata",
44 'reqd' => "yes" },
45 { 'name' => "firstvalueonly",
46 'desc' => "{AZCompactList.firstvalueonly}",
47 'type' => "flag",
48 'reqd' => "no" },
49 { 'name' => "allvalues",
50 'desc' => "{AZCompactList.allvalues}",
51 'type' => "flag",
52 'reqd' => "no" },
53 { 'name' => "buttonname",
54 'desc' => "{BasClas.buttonname}",
55 'type' => "string",
56 'deft' => "{BasClas.metadata.deft}",
57 'reqd' => "no" },
58 { 'name' => "sort",
59 'desc' => "{Hierarchy.sort}",
60 'type' => "string",
61 'deft' => "{BasClas.metadata.deft}",
62 'reqd' => "no" },
63 { 'name' => "separator",
64 'desc' => "{AutoHierarchy.separator}",
65 'type' => "regexp",
66 'deft' => "[\\\\\\\/]",
67 'reqd' => "no" },
68 { 'name' => "suppresslastlevel",
69 'desc' => "{AutoHierarchy.suppresslastlevel}",
70 'type' => "flag",
71 'reqd' => "no" }
72 ];
73
74my $options = { 'name' => "AutoHierarchy",
75 'desc' => "{AutoHierarchy.desc}",
76 'abstract' => "no",
77 'inherits' => "yes",
78 'args' => $arguments };
79
80
81sub new {
82 my $class = shift (@_);
83 my $self = new BasClas($class, @_);
84
85 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
86 my $option_list = $self->{'option_list'};
87 push( @{$option_list}, $options );
88
89 if ($self->{'info_only'}) {
90 # created from classinfo.pl - don't need to parse the arguments
91 return bless $self, $class;
92 }
93
94 my ($metadata, $buttonname, $sortname, $separator);
95 my $firstvalueonly = 0;
96 my $allvalues = 0;
97
98 if (!parsargv::parse(\@_,
99 q^metadata/.*/^, \$metadata,
100 q^buttonname/.*/^, \$buttonname,
101 q^sort/.*/^, \$sortname,
102 q^separator/.*/^, \$separator,
103 q^firstvalueonly^, \$firstvalueonly,
104 q^allvalues^, \$allvalues,
105
106 "allow_extra_options")) {
107
108 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
109 $self->print_txt_usage(""); # Use default resource bundle
110 die "\n";
111 }
112
113 if (!$metadata) {
114 print STDERR "$class Error: required option -metadata not supplied\n";
115 $self->print_txt_usage(""); # Use default resource bundle
116
117 die "$class Error: required option -metadata not supplied\n";
118 }
119 my @meta_list = split(/,/, $metadata);
120 $self->{'meta_list'} = \@meta_list;
121
122 if (!$separator) {
123 $separator = "[\\\/]";
124 }
125 $self->{'separator'} = $separator;
126
127 $buttonname = $self->generate_title_from_metadata($metadata) unless ($buttonname);
128 $self->{'title'} = $buttonname;
129
130 if ($firstvalueonly != 0) {
131 $firstvalueonly = 1;
132 }
133 $self->{'firstvalueonly'} = $firstvalueonly;
134
135 if ($allvalues != 0) {
136 $allvalues = 1;
137 }
138 $self->{'allvalues'} = $allvalues;
139
140 $self->{'uniquelastbit'} = 1;
141 # sortname is handled a bit differently - kjdon
142 # undef means to sort, but use the metadata value from -metadata
143 # because there is no one metadata value to get for sorting when
144 # we have a list of possible metadata
145 # to get no sorting, set sortname = 'nosort'
146 if (!$sortname) {
147 if (defined ($metadata)) {
148 $sortname = undef;
149 } else {
150 $sortname = "nosort";
151 }
152 }
153 $self->{'sortname'} = $sortname;
154
155 # the hash that we use to build up the hierarchy
156 $self->{'path_hash'}={};
157
158 return bless $self, $class;
159}
160
161sub init {
162 my $self = shift (@_);
163
164}
165
166sub classify {
167 my $self = shift (@_);
168 my ($doc_obj) = @_;
169
170 my $doc_OID = $doc_obj->get_OID();
171
172 # are we sorting the list??
173 my $nosort = 0;
174 if (defined $self->{'sortname'} && $self->{'sortname'} eq "nosort") {
175 $nosort = 1;
176 }
177
178 my $metavalues = [];
179 # find all the metadata values
180 foreach $m (@{$self->{'meta_list'}}) {
181 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
182 next unless (@{$mvalues});
183 if ($self->{'onlyfirst'}) {
184 # we only want the first metadata value
185 push (@$metavalues, $mvalues[0]);
186 last;
187 }
188 push (@$metavalues, @$mvalues);
189 last if (!$self->{'allvalues'}); # we don't want to try other elements
190 # cos we have already found some
191 }
192
193 return unless (@$metavalues);
194
195 #check for a sort element other than our metadata
196 my $sortmeta = undef;
197 if (!$nosort && defined $self->{'sortname'}) {
198
199 if ($self->{'sortname'} =~ /^filename$/i) {
200 $sortmeta = $doc_obj->get_source_filename();
201 } else {
202 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sortname'});
203 if (defined $sortmeta) {
204 $sortmeta = $self->format_metadata_for_sorting($self->{'sortname'}, $sortmeta, $doc_obj);
205 }
206 }
207 $sortmeta = "" unless defined $sortmeta;
208 }
209
210
211 #Add all the metadata values to the hash
212 my $path_hash;
213 my $current_pos;
214
215 foreach $metavalue (@$metavalues) {
216 $path_hash = $self->{'path_hash'};
217 my @chunks = split (/$self->{'separator'}/, $metavalue);
218 if ($self->{'uniquelastbit'}) {
219 pop(@chunks); # remove the last element from the end
220 }
221
222 foreach $folderName (@chunks)
223 {
224 if ($folderName ne ""){ #sometimes the tokens are empty
225 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
226 $path_hash = $current_pos->{'nodes'};
227 }
228 }
229 # now add the document, with sort meta if needed
230 if ($nosort) {
231 push(@{$current_pos->{'docs'}}, $doc_OID);
232 } else {
233 if (defined $sortmeta) {
234 # can you ever get the same doc twice in one classification??
235 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
236 } else {
237 $current_pos->{'docs'}->{$doc_OID} = $metavalue;
238 }
239 }
240 } # foreach metadata
241}
242
243sub add_To_Hash {
244 my $self = shift (@_);
245 my ($myhash, $k, $nosort) = @_;
246
247 if (!defined $myhash->{$k}){
248 $myhash->{$k}={};
249 $myhash->{$k}->{'nodes'}={};
250 if ($nosort) {
251 $myhash->{$k}->{'docs'}=[];
252 } else {
253 $myhash->{$k}->{'docs'} = {};
254 }
255 }
256 return $myhash->{$k};
257}
258
259sub print_Hash{
260 my $self = shift (@_);
261 my ($myHash, $num_spaces) = @_;
262
263 foreach my $key (keys %{$myHash}){
264 print "\n";
265 $self->print_spaces($num_spaces);
266 print STDERR "$key*";
267 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
268 }
269}
270
271sub print_spaces{
272 my $self = shift (@_);
273 my ($num_spaces) = @_;
274
275 for ($i = 0; $i < $num_spaces; $i++){
276 print STDERR " ";
277 }
278}
279
280sub get_entry {
281 my $self = shift (@_);
282 my ($title, $childtype, $thistype) = @_;
283
284 # organise into classification structure
285 my %classifyinfo = ('childtype'=>$childtype,
286 'Title'=>$title,
287 'contains'=>[],);
288 $classifyinfo{'thistype'} = $thistype
289 if defined $thistype && $thistype =~ /\w/;
290
291 return \%classifyinfo;
292}
293
294sub process_hash {
295 my $self = shift (@_);
296 my ($top_hash, $top_entry) = @_;
297 my ($entry);
298
299 my $hash = {};
300 foreach my $key (sort keys %{$top_hash}) {
301 $entry = $self->get_entry($key,"VList","VList");
302 my $has_content = 0;
303 my @doc_list;
304 # generate a sorted list of doc ids
305 if ($nosort && scalar(@{$top_hash->{$key}->{'docs'}})) {
306 @doc_list = @{$top_hash->{$key}->{'docs'}};
307 } elsif (!$nosort && (keys %{$top_hash->{$key}->{'docs'}})) {
308 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
309 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
310
311 }
312 # if this key has documents, add them
313 if (@doc_list) {
314 $has_content = 1;
315 foreach $d(@doc_list) {
316 push (@{$entry->{'contains'}}, {'OID'=>$d});
317 }
318 }
319 # if this key has nodes, add them
320 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
321 $has_content = 1;
322 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
323 }
324 # if we have found some content, add the new entry for this key into the parent node
325 if ($has_content) {
326 push (@{$top_entry->{'contains'}}, $entry);
327 }
328
329 }
330}
331
332sub get_classify_info {
333 my $self = shift (@_);
334 my ($no_thistype) = @_;
335 $no_thistype = 0 unless defined $no_thistype;
336
337 my ($classification);
338 my $top_h = $self->{'path_hash'};
339
340 if ($self->{'path_hash'}) {
341 $classification = $self->get_entry ($self->{'title'}, "VList", "Invisible");
342 }
343
344 $self->process_hash($top_h, $classification);
345
346 return $classification;
347
348}
349
350
3511;
352
353
354
Note: See TracBrowser for help on using the repository browser.