source: gs2-extensions/parallel-building/trunk/src/perllib/classify/Hierarchy.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

  • Property svn:executable set to *
File size: 9.2 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm -- classifier that enables a Hierarchy to beformed without
4# the need for a hierarchy file (like HFileHierarchy). Used
5# to be called AutoHierarchy. Inherits from HFileHierarchy
6# so can also do everything that does as well.
7# Created by Imene, modified by Katherine and David.
8#
9# A component of the Greenstone digital library software
10# from the New Zealand Digital Library Project at the
11# University of Waikato, New Zealand.
12#
13# Copyright (C) 1999 New Zealand Digital Library Project
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program; if not, write to the Free Software
27# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28#
29###########################################################################
30
31# An advanced Hierarchical classifier
32# to see the options, run "perl -S classinfo.pl Hierarchy"
33
34package Hierarchy;
35
36use HFileHierarchy;
37use sorttools;
38
39use strict;
40no strict 'refs'; # allow filehandles to be variables and viceversa
41
42sub BEGIN {
43 @Hierarchy::ISA = ('HFileHierarchy');
44}
45
46my $arguments =
47 [ { 'name' => "separator",
48 'desc' => "{Hierarchy.separator}",
49 'type' => "regexp",
50 'deft' => "[\\\\\\\/|\\\\\\\|]",
51 'reqd' => "no" },
52 { 'name' => "suppresslastlevel",
53 'desc' => "{Hierarchy.suppresslastlevel}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "suppressfirstlevel",
57 'desc' => "{Hierarchy.suppressfirstlevel}",
58 'type' => "flag",
59 'reqd' => "no" }
60 ];
61
62my $options = { 'name' => "Hierarchy",
63 'desc' => "{Hierarchy.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68
69sub new {
70 my ($class) = shift (@_);
71 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
72 push(@$classifierslist, $class);
73
74 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
75 push(@{$hashArgOptLists->{"OptList"}},$options);
76
77 my $self = new HFileHierarchy($classifierslist, $inputargs, $hashArgOptLists);
78
79 # the hash that we use to build up the hierarchy
80 $self->{'path_hash'}= {};
81
82 return bless $self, $class;
83}
84
85
86sub auto_classify {
87 my $self = shift (@_);
88 my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
89
90 my $doc_OID = $doc_obj->get_OID();
91
92 #Add all the metadata values to the hash
93 my $path_hash;
94 my $current_pos;
95
96
97 foreach my $metavalue (@$metavalues) {
98 $path_hash = $self->{'path_hash'};
99 my @chunks = split (/$self->{'separator'}/, $metavalue);
100 if ($self->{'suppresslastlevel'}) {
101 pop(@chunks); # remove the last element from the end
102 }
103 if ($self->{'suppressfirstlevel'}) {
104 shift(@chunks);
105 }
106 foreach my $folderName (@chunks)
107 {
108 # Removing leading and trailing spaces
109 $folderName =~ s/^(\s+)//;
110 $folderName =~ s/(\s+)$//;
111 if ($folderName ne ""){ #sometimes the tokens are empty
112 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort);
113 $path_hash = $current_pos->{'nodes'};
114 }
115 }
116 # now add the document, with sort meta if needed
117 if ($nosort) {
118 push(@{$current_pos->{'docs'}}, $doc_OID);
119 } else {
120 $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
121
122 #if (defined $sortmeta) {
123 # # can you ever get the same doc twice in one classification??
124 # $current_pos->{'docs'}->{$doc_OID} = $sortmeta;
125 # } else {
126 # $current_pos->{'docs'}->{$doc_OID} = $metavalue;
127 # }
128 }
129 } # foreach metadata
130
131}
132
133sub classify {
134 my $self = shift (@_);
135 my ($doc_obj) = @_;
136
137 my $doc_OID = $doc_obj->get_OID();
138
139 # are we sorting the list??
140 my $nosort = 0;
141 if (!defined $self->{'sort'}) {
142 $nosort = 1;
143 }
144
145 my $metavalues = [];
146 # find all the metadata values
147 foreach my $m (@{$self->{'meta_list'}}) {
148 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m);
149 next unless (@{$mvalues});
150 if ($self->{'firstvalueonly'}) {
151 # we only want the first metadata value
152 push (@$metavalues, $mvalues->[0]);
153 last;
154 }
155 push (@$metavalues, @$mvalues);
156 last if (!$self->{'allvalues'}); # we don't want to try other elements
157 # cos we have already found some
158 }
159
160 return unless (@$metavalues);
161
162 #check for a sort element other than our metadata
163 my $sortmeta = undef;
164 if (!$nosort) {
165 if ($self->{'sort'} =~ /^filename$/i) {
166 $sortmeta = $doc_obj->get_source_filename();
167 } else {
168 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
169 if (defined $sortmeta && !$self->{'no_metadata_formatting'}) {
170 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
171 }
172 }
173 $sortmeta = "" unless defined $sortmeta;
174 }
175
176 if (defined $self->{'subjectfile'}) {
177 $self->hfile_classify($doc_obj,$sortmeta,$metavalues);
178 }
179 else {
180 $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
181 }
182}
183
184sub add_To_Hash {
185 my $self = shift (@_);
186 my ($myhash, $k, $nosort) = @_;
187
188 if (!defined $myhash->{$k}){
189 $myhash->{$k}={};
190 $myhash->{$k}->{'nodes'}={};
191 if ($nosort) {
192 $myhash->{$k}->{'docs'}=[];
193 } else {
194 $myhash->{$k}->{'docs'} = {};
195 }
196 }
197 return $myhash->{$k};
198}
199
200sub print_Hash{
201 my $self = shift (@_);
202 my ($myHash, $num_spaces) = @_;
203
204 foreach my $key (keys %{$myHash}){
205 print "\n";
206 $self->print_spaces($num_spaces);
207 print STDERR "$key*";
208 $self->print_Hash($myHash->{$key}, $num_spaces + 2);
209 }
210}
211
212sub print_spaces{
213 my $self = shift (@_);
214 my ($num_spaces) = @_;
215
216 for (my $i = 0; $i < $num_spaces; $i++){
217 print STDERR " ";
218 }
219}
220
221sub get_entry {
222 my $self = shift (@_);
223 my ($title, $childtype, $thistype) = @_;
224
225 # organise into classification structure
226 my %classifyinfo = ('childtype'=>$childtype,
227 'Title'=>$title,
228 'contains'=>[],);
229 $classifyinfo{'thistype'} = $thistype
230 if defined $thistype && $thistype =~ /\w/;
231
232 return \%classifyinfo;
233}
234
235sub process_hash {
236 my $self = shift (@_);
237 my ($top_hash, $top_entry) = @_;
238 my ($entry);
239
240 my $hash = {};
241 foreach my $key (sort keys %{$top_hash}) {
242 $entry = $self->get_entry($key,"VList","VList");
243 my $has_content = 0;
244 my @doc_list;
245 # generate a sorted list of doc ids
246 if (not (defined ($self->{'sort'})) && scalar(@{$top_hash->{$key}->{'docs'}})) {
247 @doc_list = @{$top_hash->{$key}->{'docs'}};
248 } elsif (defined ($self->{'sort'}) && (keys %{$top_hash->{$key}->{'docs'}})) {
249 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a}
250 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}};
251
252 }
253
254 if ($self->{'documents_last'}) {
255 # add nodes, then documents
256 # if this key has nodes, add them
257 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
258 $has_content = 1;
259 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
260 }
261
262 # if this key has documents, add them
263 if (@doc_list) {
264 $has_content = 1;
265 foreach my $d (@doc_list) {
266 push (@{$entry->{'contains'}}, {'OID'=>$d});
267 }
268 }
269
270 } else {
271 # add documents then nodes
272 # if this key has documents, add them
273 if (@doc_list) {
274 $has_content = 1;
275 foreach my $d (@doc_list) {
276 push (@{$entry->{'contains'}}, {'OID'=>$d});
277 }
278 }
279 # if this key has nodes, add them
280 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) {
281 $has_content = 1;
282 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry);
283 }
284 }
285
286 # if we have found some content, add the new entry for this key into the parent node
287 if ($has_content) {
288 push (@{$top_entry->{'contains'}}, $entry);
289 }
290
291 }
292}
293
294sub auto_get_classify_info {
295 my $self = shift (@_);
296 my ($no_thistype) = @_;
297 $no_thistype = 0 unless defined $no_thistype;
298
299 my ($classification);
300 my $top_h = $self->{'path_hash'};
301
302 if ($self->{'path_hash'}) {
303 if ($self->{'hlist_at_top'}) {
304 $classification = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
305 }
306 else {
307 $classification = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
308 }
309 }
310
311 $self->process_hash($top_h, $classification);
312
313 return $classification;
314
315}
316
317sub auto_get_classify_info
318{
319 my $self = shift (@_);
320 my ($classifyinfo) = @_;
321
322 $self->process_hash($self->{'path_hash'}, $classifyinfo);
323
324 return $classifyinfo;
325}
326
327
328sub get_classify_info {
329 my $self = shift (@_);
330
331 my ($classifyinfo);
332
333 if ($self->{'hlist_at_top'}) {
334 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "HList", "Invisible");
335 }
336 else {
337 $classifyinfo = $self->get_entry ($self->{'buttonname'}, "VList", "Invisible");
338 }
339
340 if (defined $self->{'subjectfile'}) {
341 return $self->hfile_get_classify_info($classifyinfo);
342 }
343 else {
344 return $self->auto_get_classify_info($classifyinfo);
345 }
346}
347
348
3491;
350
351
352
Note: See TracBrowser for help on using the repository browser.