source: main/trunk/greenstone2/perllib/classify/HFileHierarchy.pm@ 23081

Last change on this file since 23081 was 23081, checked in by kjdon, 14 years ago

added a classifyOID param to get_OID_entry. Used to set the classifyOID in the classifyinfo. This is needed to keep the hfile hierarchy numbering the same as what ends up in hte classifier. Otherwise, if do remove empty classifications, then the numbering gets mucked up, and memberof doesn't work, and the user can't assign categories to the documents.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.7 KB
Line 
1###########################################################################
2#
3# HFileHierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28
29package HFileHierarchy;
30
31use BaseClassifier;
32use util;
33use cfgread;
34use sorttools;
35
36use strict;
37no strict 'refs'; # allow filehandles to be variables and viceversa
38
39sub BEGIN {
40 @HFileHierarchy::ISA = ('BaseClassifier');
41}
42
43my $arguments =
44 [ { 'name' => "metadata",
45 'desc' => "{AZCompactList.metadata}",
46 'type' => "metadata",
47 'reqd' => "yes" },
48 { 'name' => "firstvalueonly",
49 'desc' => "{AZCompactList.firstvalueonly}",
50 'type' => "flag",
51 'reqd' => "no" },
52 { 'name' => "allvalues",
53 'desc' => "{AZCompactList.allvalues}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "hfile",
57 'desc' => "{Hierarchy.hfile}",
58 'type' => "string",
59 'deft' => "",
60 'reqd' => "no" },
61 { 'name' => "sort",
62 'desc' => "{Hierarchy.sort}",
63 'type' => "metadata",
64 'reqd' => "no" },
65 { 'name' => "reverse_sort",
66 'desc' => "{Hierarchy.reverse_sort}",
67 'type' => "flag",
68 'reqd' => "no" },
69 { 'name' => "hlist_at_top",
70 'desc' => "{Hierarchy.hlist_at_top}",
71 'type' => "flag",
72 'reqd' => "no" },
73 { 'name' => "documents_last",
74 'desc' => "{Hierarchy.documents_last}",
75 'type' => "flag",
76 'reqd' => "no"}
77 ];
78
79my $options =
80{ 'name' => "HFileHierarchy",
81 'desc' => "{HFileHierarchy.desc}",
82 'abstract' => "yes",
83 'inherits' => "yes",
84 'args' => $arguments };
85
86
87sub new {
88 my ($class) = shift (@_);
89 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
90 push(@$classifierslist, $class);
91
92 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
93 push(@{$hashArgOptLists->{"OptList"}},$options);
94
95 my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
96
97 if ($self->{'info_only'}) {
98 # don't worry about any options etc
99 return bless $self, $class;
100 }
101
102 my $metadata = $self->{'metadata'};
103 if (!$metadata) {
104 print STDERR "$class Error: required option -metadata not supplied\n";
105 $self->print_txt_usage(""); # Use default resource bundle
106
107 die "$class Error: required option -metadata not supplied\n";
108 }
109
110 $self->{'buttonname'} = $self->generate_title_from_metadata($metadata) unless ($self->{'buttonname'});
111 # strip ex from metadata
112 $metadata = $self->strip_ex_from_metadata($metadata);
113
114 my @meta_list = split(/,/, $metadata);
115 $self->{'meta_list'} = \@meta_list;
116
117 # sort = undef in this case is the same as sort=nosort
118 if ($self->{'sort'} eq "nosort") {
119 $self->{'sort'} = undef;
120 }
121 if (defined $self->{'sort'}) { # remove ex. namespace
122 $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
123 }
124
125 if ($self->{'hfile'}) {
126 my $hfile = $self->{'hfile'};
127 my $subjectfile;
128 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
129 if (!-e $subjectfile) {
130 my $collfile = $subjectfile;
131 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
132 if (!-e $subjectfile) {
133 my $outhandle = $self->{'outhandle'};
134 print STDERR "\nHFileHierarchy Error: Can't locate subject file $hfile\n";
135 print STDERR "This file should be in $collfile or $subjectfile\n";
136 $self->print_txt_usage(""); # Use default resource bundle
137 print STDERR "\nHFileHierarchy Error: Can't locate subject file $hfile\n";
138 print STDERR "This file should be in $collfile or $subjectfile\n";
139 die "\n";
140 }
141 }
142 $self->{'descriptorlist'} = {}; # first field in subject file
143 $self->{'locatorlist'} = {}; # second field in subject file
144 $self->{'subjectfile'} = $subjectfile;
145 }
146
147
148 # $self->{'firstvalueonly'} = $firstvalueonly;
149 # $self->{'allvalues'} = $allvalues;
150
151 #$self->{'hlist_at_top'} = $hlist_at_top;
152
153 # Clean out the unused keys
154 delete $self->{'metadata'};
155 delete $self->{'hfile'};
156
157 return bless $self, $class;
158}
159
160sub init {
161 my $self = shift (@_);
162
163 my $subjectfile = $self->{'subjectfile'};
164 if (defined $subjectfile) {
165 # read in the subject file
166 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\S');
167 # $list is a hash that is indexed by the descriptor. The contents of this
168 # hash is a list of two items. The first item is the OID and the second item
169 # is the title
170 foreach my $descriptor (keys (%$list)) {
171 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
172 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
173 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
174 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
175 }
176 }
177 }
178}
179
180sub hfile_classify
181{
182 my $self = shift (@_);
183 my ($doc_obj,$edit_mode,$sortmeta,$metavalues) = @_;
184
185 my $outhandle = $self->{'outhandle'};
186
187 my $doc_OID = $doc_obj->get_OID();
188
189 foreach my $metaelement (@$metavalues) {
190 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
191 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
192
193 if ($edit_mode eq "delete") {
194 # find it, and remove it
195 my $existing_list = $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'};
196
197 my $filtered_list = [];
198 foreach my $existing_oid_pair (@$existing_list) {
199 if ($existing_oid_pair->[0] eq $doc_OID) {
200 print $outhandle " Deleting $doc_OID for $metaelement in hierarchy\n";
201 }
202 else {
203 push(@$filtered_list,$existing_oid_pair);
204 }
205 }
206 $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'} = $filtered_list;
207 }
208 else {
209 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
210 [$doc_OID, $sortmeta]);
211 my $localid = $self->{'descriptorlist'}->{$metaelement};
212 my $classid = $self->get_number();
213
214 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid");
215 }
216 }
217 }
218}
219
220
221
222
223sub hfile_get_classify_info {
224 my $self = shift (@_);
225
226 my ($classifyinfo) = @_;
227
228 my $list = $self->{'locatorlist'};
229
230 my $classifier_num = "CL".$self->get_number();
231 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
232 foreach my $OID (sort keys (%$list)) {
233 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, "$classifier_num.$OID", $list->{$OID}->{'title'}, "VList");
234
235 if (defined $self->{'sort'}) {
236 if ($self->{'reverse_sort'}) {
237 foreach my $subOID (sort {$b->[1] cmp $a->[1];} @{$list->{$OID}->{'contents'}}) {
238 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
239 }
240 }
241 else {
242 foreach my $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
243 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
244 }
245 }
246 }
247 else {
248 foreach my $subOID (@{$list->{$OID}->{'contents'}}) {
249 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
250 }
251 }
252 }
253
254 return $classifyinfo;
255}
256
257
258sub supports_memberof {
259 my $self = shift(@_);
260
261 return "true";
262}
263
264sub get_OID_entry {
265 my $self = shift (@_);
266 my ($OID, $classifyinfo, $classifyOID, $title, $classifytype) = @_;
267
268 $OID = "" unless defined $OID;
269 $OID =~ s/^\.+//;
270
271 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
272 $tailOID = "" unless defined $tailOID;
273
274 if (!defined $headOID) {
275 $classifyinfo->{'Title'} = $title;
276 $classifyinfo->{'classifyOID'} = $classifyOID;
277 $classifyinfo->{'classifytype'} = $classifytype;
278 return $classifyinfo;
279 }
280
281 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
282 if ($self->{'documents_last'}) {
283 # documents should come after nodes in the classifier
284
285 my $doc_pos = 0;
286 foreach my $thing (@{$classifyinfo->{'contains'}}) {
287 last if defined $thing->{'OID'};
288 $doc_pos++;
289 }
290
291 while ($doc_pos < $headOID) {
292 splice(@{$classifyinfo->{'contains'}}, $doc_pos, 0, $self->get_entry("", $classifytype));
293 $doc_pos++;
294 }
295
296 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $classifyOID, $title, $classifytype);
297
298 }
299
300 # else, documents come before nodes
301 my $offset = 0;
302 foreach my $thing (@{$classifyinfo->{'contains'}}) {
303 $offset ++ if defined $thing->{'OID'};
304 }
305
306 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
307 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
308 }
309
310 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $classifyOID, $title, $classifytype);
311}
312
313sub get_entry {
314 my $self = shift (@_);
315 my ($title, $childtype, $thistype) = @_;
316 my $memberof = &supports_memberof();
317
318 # organise into classification structure
319 my %classifyinfo = ('childtype'=>$childtype,
320 'Title'=>$title,
321 'supportsmemberof'=>$memberof,
322 'contains'=>[]);
323 $classifyinfo{'thistype'} = $thistype
324 if defined $thistype && $thistype =~ /\w/;
325
326 return \%classifyinfo;
327}
328
329
3301;
Note: See TracBrowser for help on using the repository browser.