source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 10253

Last change on this file since 10253 was 10253, checked in by kjdon, 19 years ago

added 'use strict' to all classifiers, and made modifications (mostly adding 'my') to make them compile

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4# the need for a hierarchy file (like HFileHierarchy). Used
5# to be called AutoHierarchy. Inherits from HFileHierarchy
6# so can also do everything that does as well.
7# Created by Imene, modified by Katherine and David.
8#
9# A component of the Greenstone digital library software
10# from the New Zealand Digital Library Project at the
11# University of Waikato, New Zealand.
12#
13# Copyright (C) 1999 New Zealand Digital Library Project
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program; if not, write to the Free Software
27# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28#
29###########################################################################
30
31# An advanced Hierarchical classifier
32# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34package Hierarchy;
35
36use HFileHierarchy;
37use sorttools;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42sub BEGIN {
43 @Hierarchy::ISA = ('HFileHierarchy');
44}
45
46my $arguments =
47 [ { 'name' => "separator",
48 'desc' => "{Hierarchy.separator}",
49 'type' => "regexp",
50 'deft' => "[\\\\\\\/|\\\\\\\|]",
51 'reqd' => "no" },
52 { 'name' => "suppresslastlevel",
53 'desc' => "{Hierarchy.suppresslastlevel}",
54 'type' => "flag",
55 'reqd' => "no" } ];
56
57my $options = { 'name' => "Hierarchy",
58 'desc' => "{Hierarchy.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments };
62
63
64sub new {
65 my ($class) = shift (@_);
66 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
67 push(@$classifierslist, $class);
68
69 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
70 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
71
72 my $self = (defined $hashArgOptLists)? new HFileHierarchy($classifierslist,$inputargs,$hashArgOptLists): new HFileHierarchy($classifierslist,$inputargs);
73
74 # the hash that we use to build up the hierarchy
75 $self->{'path_hash'}= {};
76
77 return bless $self, $class;
78}
79
80
81sub auto_classify {
82 my $self = shift (@_);
83 my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
84
85 my $doc_OID = $doc_obj->get_OID();
86
87 #Add all the metadata values to the hash
88 my $path_hash;
89 my $current_pos;
90
91
92 foreach my $metavalue (@$metavalues) {
93 $path_hash = $self->{'path_hash'};
94 my @chunks = split (/$self->{'separator'}/, $metavalue);
95 if ($self->{'suppresslastlevel'}) {
96 pop(@chunks); # remove the last element from the end
97 }
98 foreach my $folderName (@chunks)
99 {
100 if ($folderName ne ""){ #sometimes the tokens are empty
101 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
102 $path_hash = $current_pos->{'nodes'};
103 }
104 }
105 # now add the document, with sort meta if needed
106 if ($nosort) {
107 push(@{$current_pos->{'docs'}}, $doc_OID);
108 } else {
109 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
110
111 #if (defined $sortmeta) {
112 # # can you ever get the same doc twice in one classification??
113 # $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
114 # } else {
115 # $current_pos->{'docs'}->{$doc_OID} = $metavalue;
116 # }
117 }
118 } # foreach metadata
119
120}
121
122sub classify {
123 my $self = shift (@_);
124 my ($doc_obj) = @_;
125
126 my $doc_OID = $doc_obj->get_OID();
127
128 # are we sorting the list??
129 my $nosort = 0;
130 if (!defined $self->{'sort'}) {
131 $nosort = 1;
132 }
133
134 my $metavalues = [];
135 # find all the metadata values
136 foreach my $m (@{$self->{'meta_list'}}) {
137 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
138 next unless (@{$mvalues});
139 if ($self->{'firstvalueonly'}) {
140 # we only want the first metadata value
141 push (@$metavalues, $mvalues->[0]);
142 last;
143 }
144 push (@$metavalues, @$mvalues);
145 last if (!$self->{'allvalues'}); # we don't want to try other elements
146 # cos we have already found some
147 }
148
149 return unless (@$metavalues);
150
151 #check for a sort element other than our metadata
152 my $sortmeta = undef;
153 if (!$nosort) {
154 if ($self->{'sort'} =~ /^filename$/i) {
155 $sortmeta = $doc_obj->get_source_filename();
156 } else {
157 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
158 if (defined $sortmeta) {
159 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
160 }
161 }
162 $sortmeta = "" unless defined $sortmeta;
163 }
164
165 if (defined $self->{'subjectfile'}) {
166 $self->hfile_classify($doc_obj,$sortmeta,$metavalues);
167 }
168 else {
169 $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
170 }
171}
172
173sub add_To_Hash {
174 my $self = shift (@_);
175 my ($myhash, $k, $nosort) = @_;
176
177 if (!defined $myhash->{$k}){
178 $myhash->{$k}={};
179 $myhash->{$k}->{'nodes'}={};
180 if ($nosort) {
181 $myhash->{$k}->{'docs'}=[];
182 } else {
183 $myhash->{$k}->{'docs'} = {};
184 }
185 }
186 return $myhash->{$k};
187}
188
189sub print_Hash{
190 my $self = shift (@_);
191 my ($myHash, $num_spaces) = @_;
192
193 foreach my $key (keys %{$myHash}){
194 print "\n";
195 $self->print_spaces($num_spaces);
196 print STDERR "$key*";
197 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
198 }
199}
200
201sub print_spaces{
202 my $self = shift (@_);
203 my ($num_spaces) = @_;
204
205 for (my $i = 0; $i < $num_spaces; $i++){
206 print STDERR " ";
207 }
208}
209
210sub get_entry {
211 my $self = shift (@_);
212 my ($title, $childtype, $thistype) = @_;
213
214 # organise into classification structure
215 my %classifyinfo = ('childtype'=>$childtype,
216 'Title'=>$title,
217 'contains'=>[],);
218 $classifyinfo{'thistype'} = $thistype
219 if defined $thistype && $thistype =~ /\w/;
220
221 return \%classifyinfo;
222}
223
224sub process_hash {
225 my $self = shift (@_);
226 my ($top_hash, $top_entry) = @_;
227 my ($entry);
228
229 my $hash = {};
230 foreach my $key (sort keys %{$top_hash}) {
231 $entry = $self->get_entry($key,"VList","VList");
232 my $has_content = 0;
233 my @doc_list;
234 # generate a sorted list of doc ids
235 if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
236 @doc_list = @{$top_hash->{$key}->{'docs'}};
237 } elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
238 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
239 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
240
241 }
242 # if this key has documents, add them
243 if (@doc_list) {
244 $has_content = 1;
245 foreach my $d (@doc_list) {
246 push (@{$entry->{'contains'}}, {'OID'=>$d});
247 }
248 }
249 # if this key has nodes, add them
250 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
251 $has_content = 1;
252 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
253 }
254 # if we have found some content, add the new entry for this key into the parent node
255 if ($has_content) {
256 push (@{$top_entry->{'contains'}}, $entry);
257 }
258
259 }
260}
261
262sub auto_get_classify_info {
263 my $self = shift (@_);
264 my ($no_thistype) = @_;
265 $no_thistype = 0 unless defined $no_thistype;
266
267 my ($classification);
268 my $top_h = $self->{'path_hash'};
269
270 if ($self->{'path_hash'}) {
271 if ($self->{'hlist_at_top'}) {
272 $classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
273 }
274 else {
275 $classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
276 }
277 }
278
279 $self->process_hash($top_h, $classification);
280
281 return $classification;
282
283}
284
285sub auto_get_classify_info
286{
287 my $self = shift (@_);
288 my ($classifyinfo) = @_;
289
290 $self->process_hash($self->{'path_hash'}, $classifyinfo);
291
292 return $classifyinfo;
293}
294
295
296sub get_classify_info {
297 my $self = shift (@_);
298
299 my ($classifyinfo);
300
301 if ($self->{'hlist_at_top'}) {
302 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
303 }
304 else {
305 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
306 }
307
308 if (defined $self->{'subjectfile'}) {
309 return $self->hfile_get_classify_info($classifyinfo);
310 }
311 else {
312 return $self->auto_get_classify_info($classifyinfo);
313 }
314}
315
316
3171;
318
319
320
Note: See TracBrowser for help on using the repository browser.