source: gsdl/tags/greenstone-3_01-distribution/gsdl/perllib/classify/Hierarchy.pm@ 18510

Last change on this file since 18510 was 10663, checked in by kjdon, 19 years ago

added documents_last - display document nodes after classifier nodes in the lists

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.3 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4# the need for a hierarchy file (like HFileHierarchy). Used
5# to be called AutoHierarchy. Inherits from HFileHierarchy
6# so can also do everything that does as well.
7# Created by Imene, modified by Katherine and David.
8#
9# A component of the Greenstone digital library software
10# from the New Zealand Digital Library Project at the
11# University of Waikato, New Zealand.
12#
13# Copyright (C) 1999 New Zealand Digital Library Project
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program; if not, write to the Free Software
27# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28#
29###########################################################################
30
31# An advanced Hierarchical classifier
32# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34package Hierarchy;
35
36use HFileHierarchy;
37use sorttools;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42sub BEGIN {
43 @Hierarchy::ISA = ('HFileHierarchy');
44}
45
46my $arguments =
47 [ { 'name' => "separator",
48 'desc' => "{Hierarchy.separator}",
49 'type' => "regexp",
50 'deft' => "[\\\\\\\/|\\\\\\\|]",
51 'reqd' => "no" },
52 { 'name' => "suppresslastlevel",
53 'desc' => "{Hierarchy.suppresslastlevel}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "suppressfirstlevel",
57 'desc' => "{Hierarchy.suppressfirstlevel}",
58 'type' => "flag",
59 'reqd' => "no" }
60 ];
61
62my $options = { 'name' => "Hierarchy",
63 'desc' => "{Hierarchy.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68
69sub new {
70 my ($class) = shift (@_);
71 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
72 push(@$classifierslist, $class);
73
74 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
75 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
76
77 my $self = (defined $hashArgOptLists)? new HFileHierarchy($classifierslist,$inputargs,$hashArgOptLists): new HFileHierarchy($classifierslist,$inputargs);
78
79 # the hash that we use to build up the hierarchy
80 $self->{'path_hash'}= {};
81
82 return bless $self, $class;
83}
84
85
86sub auto_classify {
87 my $self = shift (@_);
88 my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
89
90 my $doc_OID = $doc_obj->get_OID();
91
92 #Add all the metadata values to the hash
93 my $path_hash;
94 my $current_pos;
95
96
97 foreach my $metavalue (@$metavalues) {
98 $path_hash = $self->{'path_hash'};
99 my @chunks = split (/$self->{'separator'}/, $metavalue);
100 if ($self->{'suppresslastlevel'}) {
101 pop(@chunks); # remove the last element from the end
102 }
103 if ($self->{'suppressfirstlevel'}) {
104 shift(@chunks);
105 }
106 foreach my $folderName (@chunks)
107 {
108 if ($folderName ne ""){ #sometimes the tokens are empty
109 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
110 $path_hash = $current_pos->{'nodes'};
111 }
112 }
113 # now add the document, with sort meta if needed
114 if ($nosort) {
115 push(@{$current_pos->{'docs'}}, $doc_OID);
116 } else {
117 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
118
119 #if (defined $sortmeta) {
120 # # can you ever get the same doc twice in one classification??
121 # $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
122 # } else {
123 # $current_pos->{'docs'}->{$doc_OID} = $metavalue;
124 # }
125 }
126 } # foreach metadata
127
128}
129
130sub classify {
131 my $self = shift (@_);
132 my ($doc_obj) = @_;
133
134 my $doc_OID = $doc_obj->get_OID();
135
136 # are we sorting the list??
137 my $nosort = 0;
138 if (!defined $self->{'sort'}) {
139 $nosort = 1;
140 }
141
142 my $metavalues = [];
143 # find all the metadata values
144 foreach my $m (@{$self->{'meta_list'}}) {
145 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
146 next unless (@{$mvalues});
147 if ($self->{'firstvalueonly'}) {
148 # we only want the first metadata value
149 push (@$metavalues, $mvalues->[0]);
150 last;
151 }
152 push (@$metavalues, @$mvalues);
153 last if (!$self->{'allvalues'}); # we don't want to try other elements
154 # cos we have already found some
155 }
156
157 return unless (@$metavalues);
158
159 #check for a sort element other than our metadata
160 my $sortmeta = undef;
161 if (!$nosort) {
162 if ($self->{'sort'} =~ /^filename$/i) {
163 $sortmeta = $doc_obj->get_source_filename();
164 } else {
165 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
166 if (defined $sortmeta && !$self->{'no_metadata_formatting'}) {
167 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
168 }
169 }
170 $sortmeta = "" unless defined $sortmeta;
171 }
172
173 if (defined $self->{'subjectfile'}) {
174 $self->hfile_classify($doc_obj,$sortmeta,$metavalues);
175 }
176 else {
177 $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
178 }
179}
180
181sub add_To_Hash {
182 my $self = shift (@_);
183 my ($myhash, $k, $nosort) = @_;
184
185 if (!defined $myhash->{$k}){
186 $myhash->{$k}={};
187 $myhash->{$k}->{'nodes'}={};
188 if ($nosort) {
189 $myhash->{$k}->{'docs'}=[];
190 } else {
191 $myhash->{$k}->{'docs'} = {};
192 }
193 }
194 return $myhash->{$k};
195}
196
197sub print_Hash{
198 my $self = shift (@_);
199 my ($myHash, $num_spaces) = @_;
200
201 foreach my $key (keys %{$myHash}){
202 print "\n";
203 $self->print_spaces($num_spaces);
204 print STDERR "$key*";
205 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
206 }
207}
208
209sub print_spaces{
210 my $self = shift (@_);
211 my ($num_spaces) = @_;
212
213 for (my $i = 0; $i < $num_spaces; $i++){
214 print STDERR " ";
215 }
216}
217
218sub get_entry {
219 my $self = shift (@_);
220 my ($title, $childtype, $thistype) = @_;
221
222 # organise into classification structure
223 my %classifyinfo = ('childtype'=>$childtype,
224 'Title'=>$title,
225 'contains'=>[],);
226 $classifyinfo{'thistype'} = $thistype
227 if defined $thistype && $thistype =~ /\w/;
228
229 return \%classifyinfo;
230}
231
232sub process_hash {
233 my $self = shift (@_);
234 my ($top_hash, $top_entry) = @_;
235 my ($entry);
236
237 my $hash = {};
238 foreach my $key (sort keys %{$top_hash}) {
239 $entry = $self->get_entry($key,"VList","VList");
240 my $has_content = 0;
241 my @doc_list;
242 # generate a sorted list of doc ids
243 if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
244 @doc_list = @{$top_hash->{$key}->{'docs'}};
245 } elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
246 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
247 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
248
249 }
250
251 if ($self->{'documents_last'}) {
252 # add nodes, then documents
253 # if this key has nodes, add them
254 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
255 $has_content = 1;
256 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
257 }
258
259 # if this key has documents, add them
260 if (@doc_list) {
261 $has_content = 1;
262 foreach my $d (@doc_list) {
263 push (@{$entry->{'contains'}}, {'OID'=>$d});
264 }
265 }
266
267 } else {
268 # add documents then nodes
269 # if this key has documents, add them
270 if (@doc_list) {
271 $has_content = 1;
272 foreach my $d (@doc_list) {
273 push (@{$entry->{'contains'}}, {'OID'=>$d});
274 }
275 }
276 # if this key has nodes, add them
277 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
278 $has_content = 1;
279 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
280 }
281 }
282
283 # if we have found some content, add the new entry for this key into the parent node
284 if ($has_content) {
285 push (@{$top_entry->{'contains'}}, $entry);
286 }
287
288 }
289}
290
291sub auto_get_classify_info {
292 my $self = shift (@_);
293 my ($no_thistype) = @_;
294 $no_thistype = 0 unless defined $no_thistype;
295
296 my ($classification);
297 my $top_h = $self->{'path_hash'};
298
299 if ($self->{'path_hash'}) {
300 if ($self->{'hlist_at_top'}) {
301 $classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
302 }
303 else {
304 $classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
305 }
306 }
307
308 $self->process_hash($top_h, $classification);
309
310 return $classification;
311
312}
313
314sub auto_get_classify_info
315{
316 my $self = shift (@_);
317 my ($classifyinfo) = @_;
318
319 $self->process_hash($self->{'path_hash'}, $classifyinfo);
320
321 return $classifyinfo;
322}
323
324
325sub get_classify_info {
326 my $self = shift (@_);
327
328 my ($classifyinfo);
329
330 if ($self->{'hlist_at_top'}) {
331 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
332 }
333 else {
334 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
335 }
336
337 if (defined $self->{'subjectfile'}) {
338 return $self->hfile_get_classify_info($classifyinfo);
339 }
340 else {
341 return $self->auto_get_classify_info($classifyinfo);
342 }
343}
344
345
3461;
347
348
349
Note: See TracBrowser for help on using the repository browser.