source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 10621

Last change on this file since 10621 was 10621, checked in by kjdon, 19 years ago

added a suppressfirstlevel - use with e.g. gsdlsourcefilename to remove the import directory

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4# the need for a hierarchy file (like HFileHierarchy). Used
5# to be called AutoHierarchy. Inherits from HFileHierarchy
6# so can also do everything that does as well.
7# Created by Imene, modified by Katherine and David.
8#
9# A component of the Greenstone digital library software
10# from the New Zealand Digital Library Project at the
11# University of Waikato, New Zealand.
12#
13# Copyright (C) 1999 New Zealand Digital Library Project
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program; if not, write to the Free Software
27# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28#
29###########################################################################
30
31# An advanced Hierarchical classifier
32# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34package Hierarchy;
35
36use HFileHierarchy;
37use sorttools;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42sub BEGIN {
43 @Hierarchy::ISA = ('HFileHierarchy');
44}
45
46my $arguments =
47 [ { 'name' => "separator",
48 'desc' => "{Hierarchy.separator}",
49 'type' => "regexp",
50 'deft' => "[\\\\\\\/|\\\\\\\|]",
51 'reqd' => "no" },
52 { 'name' => "suppresslastlevel",
53 'desc' => "{Hierarchy.suppresslastlevel}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "suppressfirstlevel",
57 'desc' => "{Hierarchy.suppressfirstlevel}",
58 'type' => "flag",
59 'reqd' => "no" }
60 ];
61
62my $options = { 'name' => "Hierarchy",
63 'desc' => "{Hierarchy.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68
69sub new {
70 my ($class) = shift (@_);
71 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
72 push(@$classifierslist, $class);
73
74 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
75 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
76
77 my $self = (defined $hashArgOptLists)? new HFileHierarchy($classifierslist,$inputargs,$hashArgOptLists): new HFileHierarchy($classifierslist,$inputargs);
78
79 # the hash that we use to build up the hierarchy
80 $self->{'path_hash'}= {};
81
82 return bless $self, $class;
83}
84
85
86sub auto_classify {
87 my $self = shift (@_);
88 my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
89
90 my $doc_OID = $doc_obj->get_OID();
91
92 #Add all the metadata values to the hash
93 my $path_hash;
94 my $current_pos;
95
96
97 foreach my $metavalue (@$metavalues) {
98 $path_hash = $self->{'path_hash'};
99 my @chunks = split (/$self->{'separator'}/, $metavalue);
100 if ($self->{'suppresslastlevel'}) {
101 pop(@chunks); # remove the last element from the end
102 }
103 if ($self->{'suppressfirstlevel'}) {
104 shift(@chunks);
105 }
106 foreach my $folderName (@chunks)
107 {
108 if ($folderName ne ""){ #sometimes the tokens are empty
109 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
110 $path_hash = $current_pos->{'nodes'};
111 }
112 }
113 # now add the document, with sort meta if needed
114 if ($nosort) {
115 push(@{$current_pos->{'docs'}}, $doc_OID);
116 } else {
117 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
118
119 #if (defined $sortmeta) {
120 # # can you ever get the same doc twice in one classification??
121 # $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
122 # } else {
123 # $current_pos->{'docs'}->{$doc_OID} = $metavalue;
124 # }
125 }
126 } # foreach metadata
127
128}
129
130sub classify {
131 my $self = shift (@_);
132 my ($doc_obj) = @_;
133
134 my $doc_OID = $doc_obj->get_OID();
135
136 # are we sorting the list??
137 my $nosort = 0;
138 if (!defined $self->{'sort'}) {
139 $nosort = 1;
140 }
141
142 my $metavalues = [];
143 # find all the metadata values
144 foreach my $m (@{$self->{'meta_list'}}) {
145 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
146 next unless (@{$mvalues});
147 if ($self->{'firstvalueonly'}) {
148 # we only want the first metadata value
149 push (@$metavalues, $mvalues->[0]);
150 last;
151 }
152 push (@$metavalues, @$mvalues);
153 last if (!$self->{'allvalues'}); # we don't want to try other elements
154 # cos we have already found some
155 }
156
157 return unless (@$metavalues);
158
159 #check for a sort element other than our metadata
160 my $sortmeta = undef;
161 if (!$nosort) {
162 if ($self->{'sort'} =~ /^filename$/i) {
163 $sortmeta = $doc_obj->get_source_filename();
164 } else {
165 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
166 if (defined $sortmeta) {
167 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
168 }
169 }
170 $sortmeta = "" unless defined $sortmeta;
171 }
172
173 if (defined $self->{'subjectfile'}) {
174 $self->hfile_classify($doc_obj,$sortmeta,$metavalues);
175 }
176 else {
177 $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
178 }
179}
180
181sub add_To_Hash {
182 my $self = shift (@_);
183 my ($myhash, $k, $nosort) = @_;
184
185 if (!defined $myhash->{$k}){
186 $myhash->{$k}={};
187 $myhash->{$k}->{'nodes'}={};
188 if ($nosort) {
189 $myhash->{$k}->{'docs'}=[];
190 } else {
191 $myhash->{$k}->{'docs'} = {};
192 }
193 }
194 return $myhash->{$k};
195}
196
197sub print_Hash{
198 my $self = shift (@_);
199 my ($myHash, $num_spaces) = @_;
200
201 foreach my $key (keys %{$myHash}){
202 print "\n";
203 $self->print_spaces($num_spaces);
204 print STDERR "$key*";
205 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
206 }
207}
208
209sub print_spaces{
210 my $self = shift (@_);
211 my ($num_spaces) = @_;
212
213 for (my $i = 0; $i < $num_spaces; $i++){
214 print STDERR " ";
215 }
216}
217
218sub get_entry {
219 my $self = shift (@_);
220 my ($title, $childtype, $thistype) = @_;
221
222 # organise into classification structure
223 my %classifyinfo = ('childtype'=>$childtype,
224 'Title'=>$title,
225 'contains'=>[],);
226 $classifyinfo{'thistype'} = $thistype
227 if defined $thistype && $thistype =~ /\w/;
228
229 return \%classifyinfo;
230}
231
232sub process_hash {
233 my $self = shift (@_);
234 my ($top_hash, $top_entry) = @_;
235 my ($entry);
236
237 my $hash = {};
238 foreach my $key (sort keys %{$top_hash}) {
239 $entry = $self->get_entry($key,"VList","VList");
240 my $has_content = 0;
241 my @doc_list;
242 # generate a sorted list of doc ids
243 if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
244 @doc_list = @{$top_hash->{$key}->{'docs'}};
245 } elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
246 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
247 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
248
249 }
250 # if this key has documents, add them
251 if (@doc_list) {
252 $has_content = 1;
253 foreach my $d (@doc_list) {
254 push (@{$entry->{'contains'}}, {'OID'=>$d});
255 }
256 }
257 # if this key has nodes, add them
258 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
259 $has_content = 1;
260 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
261 }
262 # if we have found some content, add the new entry for this key into the parent node
263 if ($has_content) {
264 push (@{$top_entry->{'contains'}}, $entry);
265 }
266
267 }
268}
269
270sub auto_get_classify_info {
271 my $self = shift (@_);
272 my ($no_thistype) = @_;
273 $no_thistype = 0 unless defined $no_thistype;
274
275 my ($classification);
276 my $top_h = $self->{'path_hash'};
277
278 if ($self->{'path_hash'}) {
279 if ($self->{'hlist_at_top'}) {
280 $classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
281 }
282 else {
283 $classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
284 }
285 }
286
287 $self->process_hash($top_h, $classification);
288
289 return $classification;
290
291}
292
293sub auto_get_classify_info
294{
295 my $self = shift (@_);
296 my ($classifyinfo) = @_;
297
298 $self->process_hash($self->{'path_hash'}, $classifyinfo);
299
300 return $classifyinfo;
301}
302
303
304sub get_classify_info {
305 my $self = shift (@_);
306
307 my ($classifyinfo);
308
309 if ($self->{'hlist_at_top'}) {
310 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
311 }
312 else {
313 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
314 }
315
316 if (defined $self->{'subjectfile'}) {
317 return $self->hfile_get_classify_info($classifyinfo);
318 }
319 else {
320 return $self->auto_get_classify_info($classifyinfo);
321 }
322}
323
324
3251;
326
327
328
Note: See TracBrowser for help on using the repository browser.