source: gsdl/trunk/perllib/classify/Hierarchy.pm@ 18455

Last change on this file since 18455 was 18455, checked in by davidb, 15 years ago

Addition of 'edit_mode' parameter to classify(). This can be either 'add' 'delete' or 'reindex' (should think about renaming the last one to something more appropriate, e.g. update).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.5 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4# the need for a hierarchy file (like HFileHierarchy). Used
5# to be called AutoHierarchy. Inherits from HFileHierarchy
6# so can also do everything that does as well.
7# Created by Imene, modified by Katherine and David.
8#
9# A component of the Greenstone digital library software
10# from the New Zealand Digital Library Project at the
11# University of Waikato, New Zealand.
12#
13# Copyright (C) 1999 New Zealand Digital Library Project
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program; if not, write to the Free Software
27# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28#
29###########################################################################
30
31# An advanced Hierarchical classifier
32# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34package Hierarchy;
35
36use HFileHierarchy;
37use sorttools;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42sub BEGIN {
43 @Hierarchy::ISA = ('HFileHierarchy');
44}
45
46my $arguments =
47 [ { 'name' => "separator",
48 'desc' => "{Hierarchy.separator}",
49 'type' => "regexp",
50 'deft' => "[\\\\\\\/|\\\\\\\|]",
51 'reqd' => "no" },
52 { 'name' => "suppresslastlevel",
53 'desc' => "{Hierarchy.suppresslastlevel}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "suppressfirstlevel",
57 'desc' => "{Hierarchy.suppressfirstlevel}",
58 'type' => "flag",
59 'reqd' => "no" }
60 ];
61
62my $options = { 'name' => "Hierarchy",
63 'desc' => "{Hierarchy.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68
69sub new {
70 my ($class) = shift (@_);
71 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
72 push(@$classifierslist, $class);
73
74 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
75 push(@{$hashArgOptLists->{"OptList"}},$options);
76
77 my $self = new HFileHierarchy($classifierslist, $inputargs, $hashArgOptLists);
78
79 # the hash that we use to build up the hierarchy
80 $self->{'path_hash'}= {};
81
82 return bless $self, $class;
83}
84
85
86sub auto_classify {
87 my $self = shift (@_);
88 my ($doc_obj,$edit_mode,$nosort,$sortmeta,$metavalues) = @_;
89
90 my $doc_OID = $doc_obj->get_OID();
91
92 if ($edit_mode eq "delete") {
93 if ($nosort) {
94 $self->oid_array_delete($doc_OID,'docs');
95 }
96 else {
97 $self->oid_hash_delete($doc_OID,'docs');
98 }
99 return;
100 }
101
102 #Add all the metadata values to the hash
103 my $path_hash;
104 my $current_pos;
105
106
107 foreach my $metavalue (@$metavalues) {
108 $path_hash = $self->{'path_hash'};
109 my @chunks = split (/$self->{'separator'}/, $metavalue);
110 if ($self->{'suppresslastlevel'}) {
111 pop(@chunks); # remove the last element from the end
112 }
113 if ($self->{'suppressfirstlevel'}) {
114 shift(@chunks);
115 }
116 foreach my $folderName (@chunks)
117 {
118 # Removing leading and trailing spaces
119 $folderName =~ s/^(\s+)//;
120 $folderName =~ s/(\s+)$//;
121 if ($folderName ne ""){ #sometimes the tokens are empty
122 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
123 $path_hash = $current_pos->{'nodes'};
124 }
125 }
126 # now add the document, with sort meta if needed
127 if ($nosort) {
128 push(@{$current_pos->{'docs'}}, $doc_OID);
129 } else {
130 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
131
132 #if (defined $sortmeta) {
133 # # can you ever get the same doc twice in one classification??
134 # $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
135 # } else {
136 # $current_pos->{'docs'}->{$doc_OID} = $metavalue;
137 # }
138 }
139 } # foreach metadata
140
141}
142
143sub classify {
144 my $self = shift (@_);
145 my ($doc_obj,$edit_mode) = @_;
146
147 my $doc_OID = $doc_obj->get_OID();
148
149 # are we sorting the list??
150 my $nosort = 0;
151 if (!defined $self->{'sort'}) {
152 $nosort = 1;
153 }
154
155 my $metavalues = [];
156 # find all the metadata values
157 foreach my $m (@{$self->{'meta_list'}}) {
158 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
159 next unless (@{$mvalues});
160 if ($self->{'firstvalueonly'}) {
161 # we only want the first metadata value
162 push (@$metavalues, $mvalues->[0]);
163 last;
164 }
165 push (@$metavalues, @$mvalues);
166 last if (!$self->{'allvalues'}); # we don't want to try other elements
167 # cos we have already found some
168 }
169
170 return unless (@$metavalues);
171
172 #check for a sort element other than our metadata
173 my $sortmeta = undef;
174 if (!$nosort) {
175 if ($self->{'sort'} =~ /^filename$/i) {
176 $sortmeta = $doc_obj->get_source_filename();
177 } else {
178 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
179 if (defined $sortmeta && !$self->{'no_metadata_formatting'}) {
180 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
181 }
182 }
183 $sortmeta = "" unless defined $sortmeta;
184 }
185
186 if (defined $self->{'subjectfile'}) {
187 $self->hfile_classify($doc_obj,$edit_mode,$sortmeta,$metavalues);
188 }
189 else {
190 $self->auto_classify($doc_obj,$edit_mode,$nosort,$sortmeta,$metavalues);
191 }
192}
193
194sub add_To_Hash {
195 my $self = shift (@_);
196 my ($myhash, $k, $nosort) = @_;
197
198 if (!defined $myhash->{$k}){
199 $myhash->{$k}={};
200 $myhash->{$k}->{'nodes'}={};
201 if ($nosort) {
202 $myhash->{$k}->{'docs'}=[];
203 } else {
204 $myhash->{$k}->{'docs'} = {};
205 }
206 }
207 return $myhash->{$k};
208}
209
210sub print_Hash{
211 my $self = shift (@_);
212 my ($myHash, $num_spaces) = @_;
213
214 foreach my $key (keys %{$myHash}){
215 print "\n";
216 $self->print_spaces($num_spaces);
217 print STDERR "$key*";
218 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
219 }
220}
221
222sub print_spaces{
223 my $self = shift (@_);
224 my ($num_spaces) = @_;
225
226 for (my $i = 0; $i < $num_spaces; $i++){
227 print STDERR " ";
228 }
229}
230
231sub get_entry {
232 my $self = shift (@_);
233 my ($title, $childtype, $thistype) = @_;
234
235 # organise into classification structure
236 my %classifyinfo = ('childtype'=>$childtype,
237 'Title'=>$title,
238 'contains'=>[],);
239 $classifyinfo{'thistype'} = $thistype
240 if defined $thistype && $thistype =~ /\w/;
241
242 return \%classifyinfo;
243}
244
245sub process_hash {
246 my $self = shift (@_);
247 my ($top_hash, $top_entry) = @_;
248 my ($entry);
249
250 my $hash = {};
251 foreach my $key (sort keys %{$top_hash}) {
252 $entry = $self->get_entry($key,"VList","VList");
253 my $has_content = 0;
254 my @doc_list;
255 # generate a sorted list of doc ids
256 if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
257 @doc_list = @{$top_hash->{$key}->{'docs'}};
258 } elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
259 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
260 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
261
262 }
263
264 if ($self->{'documents_last'}) {
265 # add nodes, then documents
266 # if this key has nodes, add them
267 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
268 $has_content = 1;
269 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
270 }
271
272 # if this key has documents, add them
273 if (@doc_list) {
274 $has_content = 1;
275 foreach my $d (@doc_list) {
276 push (@{$entry->{'contains'}}, {'OID'=>$d});
277 }
278 }
279
280 } else {
281 # add documents then nodes
282 # if this key has documents, add them
283 if (@doc_list) {
284 $has_content = 1;
285 foreach my $d (@doc_list) {
286 push (@{$entry->{'contains'}}, {'OID'=>$d});
287 }
288 }
289 # if this key has nodes, add them
290 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
291 $has_content = 1;
292 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
293 }
294 }
295
296 # if we have found some content, add the new entry for this key into the parent node
297 if ($has_content) {
298 push (@{$top_entry->{'contains'}}, $entry);
299 }
300
301 }
302}
303
304sub auto_get_classify_info {
305 my $self = shift (@_);
306 my ($no_thistype) = @_;
307 $no_thistype = 0 unless defined $no_thistype;
308
309 my ($classification);
310 my $top_h = $self->{'path_hash'};
311
312 if ($self->{'path_hash'}) {
313 if ($self->{'hlist_at_top'}) {
314 $classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
315 }
316 else {
317 $classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
318 }
319 }
320
321 $self->process_hash($top_h, $classification);
322
323 return $classification;
324
325}
326
327sub auto_get_classify_info
328{
329 my $self = shift (@_);
330 my ($classifyinfo) = @_;
331
332 $self->process_hash($self->{'path_hash'}, $classifyinfo);
333
334 return $classifyinfo;
335}
336
337
338sub get_classify_info {
339 my $self = shift (@_);
340
341 my ($classifyinfo);
342
343 if ($self->{'hlist_at_top'}) {
344 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
345 }
346 else {
347 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
348 }
349
350 if (defined $self->{'subjectfile'}) {
351 return $self->hfile_get_classify_info($classifyinfo);
352 }
353 else {
354 return $self->auto_get_classify_info($classifyinfo);
355 }
356}
357
358
3591;
360
361
362
Note: See TracBrowser for help on using the repository browser.