source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.5 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4# the need for a hierarchy file (like HFileHierarchy). Used
5# to be called AutoHierarchy. Inherits from HFileHierarchy
6# so can also do everything that does as well.
7# Created by Imene, modified by Katherine and David.
8#
9# A component of the Greenstone digital library software
10# from the New Zealand Digital Library Project at the
11# University of Waikato, New Zealand.
12#
13# Copyright (C) 1999 New Zealand Digital Library Project
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program; if not, write to the Free Software
27# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28#
29###########################################################################
30
31# An advanced Hierarchical classifier
32# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34package Hierarchy;
35
36use HFileHierarchy;
37use sorttools;
38
39sub BEGIN {
40 @ISA = ('HFileHierarchy');
41}
42
43my $arguments =
44 [ { 'name' => "separator",
45 'desc' => "{Hierarchy.separator}",
46 'type' => "regexp",
47 'deft' => "[\\\\\\\/|\\\\\\\|]",
48 'reqd' => "no" },
49 { 'name' => "suppresslastlevel",
50 'desc' => "{Hierarchy.suppresslastlevel}",
51 'type' => "flag",
52 'reqd' => "no" } ];
53
54my $options = { 'name' => "Hierarchy",
55 'desc' => "{Hierarchy.desc}",
56 'abstract' => "no",
57 'inherits' => "yes",
58 'args' => $arguments };
59
60
61sub new {
62 my ($class) = shift (@_);
63 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
64 push(@$classifierslist, $class);
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new HFileHierarchy($classifierslist,$inputargs,$hashArgOptLists): new HFileHierarchy($classifierslist,$inputargs);
70
71 # the hash that we use to build up the hierarchy
72 $self->{'path_hash'}= {};
73
74 return bless $self, $class;
75}
76
77
78sub auto_classify {
79 my $self = shift (@_);
80 my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
81
82 my $doc_OID = $doc_obj->get_OID();
83
84 #Add all the metadata values to the hash
85 my $path_hash;
86 my $current_pos;
87
88 foreach my $metavalue (@$metavalues) {
89 $path_hash = $self->{'path_hash'};
90 my @chunks = split (/$self->{'separator'}/, $metavalue);
91 if ($self->{'suppresslastlevel'}) {
92 pop(@chunks); # remove the last element from the end
93 }
94
95 foreach my $folderName (@chunks)
96 {
97 if ($folderName ne ""){ #sometimes the tokens are empty
98 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
99 $path_hash = $current_pos->{'nodes'};
100 }
101 }
102 # now add the document, with sort meta if needed
103 if ($nosort) {
104 push(@{$current_pos->{'docs'}}, $doc_OID);
105 } else {
106 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
107
108 #if (defined $sortmeta) {
109 # # can you ever get the same doc twice in one classification??
110 # $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
111 # } else {
112 # $current_pos->{'docs'}->{$doc_OID} = $metavalue;
113 # }
114 }
115 } # foreach metadata
116
117}
118
119sub classify {
120 my $self = shift (@_);
121 my ($doc_obj) = @_;
122
123 my $doc_OID = $doc_obj->get_OID();
124
125 # are we sorting the list??
126 my $nosort = 0;
127 if (!defined $self->{'sort'}) {
128 $nosort = 1;
129 }
130
131 my $metavalues = [];
132 # find all the metadata values
133 foreach $m (@{$self->{'meta_list'}}) {
134 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
135 next unless (@{$mvalues});
136 if ($self->{'firstvalueonly'}) {
137 # we only want the first metadata value
138 push (@$metavalues, $mvalues[0]);
139 last;
140 }
141 push (@$metavalues, @$mvalues);
142 last if (!$self->{'allvalues'}); # we don't want to try other elements
143 # cos we have already found some
144 }
145
146 return unless (@$metavalues);
147
148 #check for a sort element other than our metadata
149 my $sortmeta = undef;
150 if (!$nosort) {
151 if ($self->{'sort'} =~ /^filename$/i) {
152 $sortmeta = $doc_obj->get_source_filename();
153 } else {
154 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
155 if (defined $sortmeta) {
156 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
157 }
158 }
159 $sortmeta = "" unless defined $sortmeta;
160 }
161
162 if (defined $self->{'subjectfile'}) {
163 $self->hfile_classify($doc_obj,$sortmeta,$metavalues);
164 }
165 else {
166 $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
167 }
168}
169
170sub add_To_Hash {
171 my $self = shift (@_);
172 my ($myhash, $k, $nosort) = @_;
173
174 if (!defined $myhash->{$k}){
175 $myhash->{$k}={};
176 $myhash->{$k}->{'nodes'}={};
177 if ($nosort) {
178 $myhash->{$k}->{'docs'}=[];
179 } else {
180 $myhash->{$k}->{'docs'} = {};
181 }
182 }
183 return $myhash->{$k};
184}
185
186sub print_Hash{
187 my $self = shift (@_);
188 my ($myHash, $num_spaces) = @_;
189
190 foreach my $key (keys %{$myHash}){
191 print "\n";
192 $self->print_spaces($num_spaces);
193 print STDERR "$key*";
194 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
195 }
196}
197
198sub print_spaces{
199 my $self = shift (@_);
200 my ($num_spaces) = @_;
201
202 for ($i = 0; $i < $num_spaces; $i++){
203 print STDERR " ";
204 }
205}
206
207sub get_entry {
208 my $self = shift (@_);
209 my ($title, $childtype, $thistype) = @_;
210
211 # organise into classification structure
212 my %classifyinfo = ('childtype'=>$childtype,
213 'Title'=>$title,
214 'contains'=>[],);
215 $classifyinfo{'thistype'} = $thistype
216 if defined $thistype && $thistype =~ /\w/;
217
218 return \%classifyinfo;
219}
220
221sub process_hash {
222 my $self = shift (@_);
223 my ($top_hash, $top_entry) = @_;
224 my ($entry);
225
226 my $hash = {};
227 foreach my $key (sort keys %{$top_hash}) {
228 $entry = $self->get_entry($key,"VList","VList");
229 my $has_content = 0;
230 my @doc_list;
231 # generate a sorted list of doc ids
232 if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
233 @doc_list = @{$top_hash->{$key}->{'docs'}};
234 } elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
235 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
236 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
237
238 }
239 # if this key has documents, add them
240 if (@doc_list) {
241 $has_content = 1;
242 foreach $d(@doc_list) {
243 push (@{$entry->{'contains'}}, {'OID'=>$d});
244 }
245 }
246 # if this key has nodes, add them
247 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
248 $has_content = 1;
249 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
250 }
251 # if we have found some content, add the new entry for this key into the parent node
252 if ($has_content) {
253 push (@{$top_entry->{'contains'}}, $entry);
254 }
255
256 }
257}
258
259sub auto_get_classify_info {
260 my $self = shift (@_);
261 my ($no_thistype) = @_;
262 $no_thistype = 0 unless defined $no_thistype;
263
264 my ($classification);
265 my $top_h = $self->{'path_hash'};
266
267 if ($self->{'path_hash'}) {
268 if ($self->{'hlist_at_top'}) {
269 $classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
270 }
271 else {
272 $classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
273 }
274 }
275
276 $self->process_hash($top_h, $classification);
277
278 return $classification;
279
280}
281
282sub auto_get_classify_info
283{
284 my $self = shift (@_);
285 my ($classifyinfo) = @_;
286
287 $self->process_hash($self->{'path_hash'}, $classifyinfo);
288
289 return $classifyinfo;
290}
291
292
293sub get_classify_info {
294 my $self = shift (@_);
295
296 my ($classifyinfo);
297
298 if ($self->{'hlist_at_top'}) {
299 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
300 }
301 else {
302 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
303 }
304
305 if (defined $self->{'subjectfile'}) {
306 return $self->hfile_get_classify_info($classifyinfo);
307 }
308 else {
309 return $self->auto_get_classify_info($classifyinfo);
310 }
311}
312
313
3141;
315
316
317
Note: See TracBrowser for help on using the repository browser.