Context Navigation

source: gsdl/tags/greenstone-3_01-distribution/gsdl/perllib/classify/Hierarchy.pm@ 18510

Last change on this file since 18510 was 10663, checked in by kjdon, 19 years ago
added documents_last - display document nodes after classifier nodes in the lists
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 9.3 KB

Line
1	###########################################################################
2	#
3	# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4	# the need for a hierarchy file (like HFileHierarchy). Used
5	# to be called AutoHierarchy. Inherits from HFileHierarchy
6	# so can also do everything that does as well.
7	# Created by Imene, modified by Katherine and David.
8	#
9	# A component of the Greenstone digital library software
10	# from the New Zealand Digital Library Project at the
11	# University of Waikato, New Zealand.
12	#
13	# Copyright (C) 1999 New Zealand Digital Library Project
14	#
15	# This program is free software; you can redistribute it and/or modify
16	# it under the terms of the GNU General Public License as published by
17	# the Free Software Foundation; either version 2 of the License, or
18	# (at your option) any later version.
19	#
20	# This program is distributed in the hope that it will be useful,
21	# but WITHOUT ANY WARRANTY; without even the implied warranty of
22	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23	# GNU General Public License for more details.
24	#
25	# You should have received a copy of the GNU General Public License
26	# along with this program; if not, write to the Free Software
27	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28	#
29	###########################################################################
30
31	# An advanced Hierarchical classifier
32	# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34	package Hierarchy;
35
36	use HFileHierarchy;
37	use sorttools;
38
39	use strict;
40	no strict 'refs'; # allow filehandles to be variables and viceversa
41
42	sub BEGIN {
43	@Hierarchy::ISA = ('HFileHierarchy');
44	}
45
46	my $arguments =
47	[ { 'name' => "separator",
48	'desc' => "{Hierarchy.separator}",
49	'type' => "regexp",
50	'deft' => "[\\\\\\\/\|\\\\\\\\|]",
51	'reqd' => "no" },
52	{ 'name' => "suppresslastlevel",
53	'desc' => "{Hierarchy.suppresslastlevel}",
54	'type' => "flag",
55	'reqd' => "no" },
56	{ 'name' => "suppressfirstlevel",
57	'desc' => "{Hierarchy.suppressfirstlevel}",
58	'type' => "flag",
59	'reqd' => "no" }
60	];
61
62	my $options = { 'name' => "Hierarchy",
63	'desc' => "{Hierarchy.desc}",
64	'abstract' => "no",
65	'inherits' => "yes",
66	'args' => $arguments };
67
68
69	sub new {
70	my ($class) = shift (@_);
71	my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
72	push(@$classifierslist, $class);
73
74	if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
75	if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
76
77	my $self = (defined $hashArgOptLists)? new HFileHierarchy($classifierslist,$inputargs,$hashArgOptLists): new HFileHierarchy($classifierslist,$inputargs);
78
79	# the hash that we use to build up the hierarchy
80	$self->{'path_hash'}= {};
81
82	return bless $self, $class;
83	}
84
85
86	sub auto_classify {
87	my $self = shift (@_);
88	my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
89
90	my $doc_OID = $doc_obj->get_OID();
91
92	#Add all the metadata values to the hash
93	my $path_hash;
94	my $current_pos;
95
96
97	foreach my $metavalue (@$metavalues) {
98	$path_hash = $self->{'path_hash'};
99	my @chunks = split (/$self->{'separator'}/, $metavalue);
100	if ($self->{'suppresslastlevel'}) {
101	pop(@chunks); # remove the last element from the end
102	}
103	if ($self->{'suppressfirstlevel'}) {
104	shift(@chunks);
105	}
106	foreach my $folderName (@chunks)
107	{
108	if ($folderName ne ""){ #sometimes the tokens are empty
109	$current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
110	$path_hash = $current_pos->{'nodes'};
111	}
112	}
113	# now add the document, with sort meta if needed
114	if ($nosort) {
115	push(@{$current_pos->{'docs'}}, $doc_OID);
116	} else {
117	$current_pos->{'docs'}->{$doc_OID} = $sortmeta;
118
119	#if (defined $sortmeta) {
120	# # can you ever get the same doc twice in one classification??
121	# $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
122	# } else {
123	# $current_pos->{'docs'}->{$doc_OID} = $metavalue;
124	# }
125	}
126	} # foreach metadata
127
128	}
129
130	sub classify {
131	my $self = shift (@_);
132	my ($doc_obj) = @_;
133
134	my $doc_OID = $doc_obj->get_OID();
135
136	# are we sorting the list??
137	my $nosort = 0;
138	if (!defined $self->{'sort'}) {
139	$nosort = 1;
140	}
141
142	my $metavalues = [];
143	# find all the metadata values
144	foreach my $m (@{$self->{'meta_list'}}) {
145	my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
146	next unless (@{$mvalues});
147	if ($self->{'firstvalueonly'}) {
148	# we only want the first metadata value
149	push (@$metavalues, $mvalues->[0]);
150	last;
151	}
152	push (@$metavalues, @$mvalues);
153	last if (!$self->{'allvalues'}); # we don't want to try other elements
154	# cos we have already found some
155	}
156
157	return unless (@$metavalues);
158
159	#check for a sort element other than our metadata
160	my $sortmeta = undef;
161	if (!$nosort) {
162	if ($self->{'sort'} =~ /^filename$/i) {
163	$sortmeta = $doc_obj->get_source_filename();
164	} else {
165	$sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
166	if (defined $sortmeta && !$self->{'no_metadata_formatting'}) {
167	$sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
168	}
169	}
170	$sortmeta = "" unless defined $sortmeta;
171	}
172
173	if (defined $self->{'subjectfile'}) {
174	$self->hfile_classify($doc_obj,$sortmeta,$metavalues);
175	}
176	else {
177	$self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
178	}
179	}
180
181	sub add_To_Hash {
182	my $self = shift (@_);
183	my ($myhash, $k, $nosort) = @_;
184
185	if (!defined $myhash->{$k}){
186	$myhash->{$k}={};
187	$myhash->{$k}->{'nodes'}={};
188	if ($nosort) {
189	$myhash->{$k}->{'docs'}=[];
190	} else {
191	$myhash->{$k}->{'docs'} = {};
192	}
193	}
194	return $myhash->{$k};
195	}
196
197	sub print_Hash{
198	my $self = shift (@_);
199	my ($myHash, $num_spaces) = @_;
200
201	foreach my $key (keys %{$myHash}){
202	print "\n";
203	$self->print_spaces($num_spaces);
204	print STDERR "$key*";
205	$self->print_Hash($myHash->{$key}, $num_spaces + 2);
206	}
207	}
208
209	sub print_spaces{
210	my $self = shift (@_);
211	my ($num_spaces) = @_;
212
213	for (my $i = 0; $i < $num_spaces; $i++){
214	print STDERR " ";
215	}
216	}
217
218	sub get_entry {
219	my $self = shift (@_);
220	my ($title, $childtype, $thistype) = @_;
221
222	# organise into classification structure
223	my %classifyinfo = ('childtype'=>$childtype,
224	'Title'=>$title,
225	'contains'=>[],);
226	$classifyinfo{'thistype'} = $thistype
227	if defined $thistype && $thistype =~ /\w/;
228
229	return \%classifyinfo;
230	}
231
232	sub process_hash {
233	my $self = shift (@_);
234	my ($top_hash, $top_entry) = @_;
235	my ($entry);
236
237	my $hash = {};
238	foreach my $key (sort keys %{$top_hash}) {
239	$entry = $self->get_entry($key,"VList","VList");
240	my $has_content = 0;
241	my @doc_list;
242	# generate a sorted list of doc ids
243	if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
244	@doc_list = @{$top_hash->{$key}->{'docs'}};
245	} elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
246	@doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
247	cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
248
249	}
250
251	if ($self->{'documents_last'}) {
252	# add nodes, then documents
253	# if this key has nodes, add them
254	if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
255	$has_content = 1;
256	$self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
257	}
258
259	# if this key has documents, add them
260	if (@doc_list) {
261	$has_content = 1;
262	foreach my $d (@doc_list) {
263	push (@{$entry->{'contains'}}, {'OID'=>$d});
264	}
265	}
266
267	} else {
268	# add documents then nodes
269	# if this key has documents, add them
270	if (@doc_list) {
271	$has_content = 1;
272	foreach my $d (@doc_list) {
273	push (@{$entry->{'contains'}}, {'OID'=>$d});
274	}
275	}
276	# if this key has nodes, add them
277	if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
278	$has_content = 1;
279	$self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
280	}
281	}
282
283	# if we have found some content, add the new entry for this key into the parent node
284	if ($has_content) {
285	push (@{$top_entry->{'contains'}}, $entry);
286	}
287
288	}
289	}
290
291	sub auto_get_classify_info {
292	my $self = shift (@_);
293	my ($no_thistype) = @_;
294	$no_thistype = 0 unless defined $no_thistype;
295
296	my ($classification);
297	my $top_h = $self->{'path_hash'};
298
299	if ($self->{'path_hash'}) {
300	if ($self->{'hlist_at_top'}) {
301	$classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
302	}
303	else {
304	$classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
305	}
306	}
307
308	$self->process_hash($top_h, $classification);
309
310	return $classification;
311
312	}
313
314	sub auto_get_classify_info
315	{
316	my $self = shift (@_);
317	my ($classifyinfo) = @_;
318
319	$self->process_hash($self->{'path_hash'}, $classifyinfo);
320
321	return $classifyinfo;
322	}
323
324
325	sub get_classify_info {
326	my $self = shift (@_);
327
328	my ($classifyinfo);
329
330	if ($self->{'hlist_at_top'}) {
331	$classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
332	}
333	else {
334	$classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
335	}
336
337	if (defined $self->{'subjectfile'}) {
338	return $self->hfile_get_classify_info($classifyinfo);
339	}
340	else {
341	return $self->auto_get_classify_info($classifyinfo);
342	}
343	}
344
345
346	1;
347
348
349

Note: See TracBrowser for help on using the repository browser.

Download in other formats: