source: gsdl/trunk/perllib/classify/HFileHierarchy.pm@ 17110

Last change on this file since 17110 was 12891, checked in by mdewsnip, 18 years ago

Tidied up that horrible long line in the new() function of every classifier.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.4 KB
Line 
1###########################################################################
2#
3# HFileHierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28
29package HFileHierarchy;
30
31use BasClas;
32use util;
33use cfgread;
34use sorttools;
35
36use strict;
37no strict 'refs'; # allow filehandles to be variables and viceversa
38
39sub BEGIN {
40 @HFileHierarchy::ISA = ('BasClas');
41}
42
43my $arguments =
44 [ { 'name' => "metadata",
45 'desc' => "{AZCompactList.metadata}",
46 'type' => "metadata",
47 'reqd' => "yes" },
48 { 'name' => "firstvalueonly",
49 'desc' => "{AZCompactList.firstvalueonly}",
50 'type' => "flag",
51 'reqd' => "no" },
52 { 'name' => "allvalues",
53 'desc' => "{AZCompactList.allvalues}",
54 'type' => "flag",
55 'reqd' => "no" },
56 { 'name' => "hfile",
57 'desc' => "{Hierarchy.hfile}",
58 'type' => "string",
59 'deft' => "",
60 'reqd' => "no" },
61 { 'name' => "sort",
62 'desc' => "{Hierarchy.sort}",
63 'type' => "metadata",
64 'reqd' => "no" },
65 { 'name' => "reverse_sort",
66 'desc' => "{Hierarchy.reverse_sort}",
67 'type' => "flag",
68 'reqd' => "no" },
69 { 'name' => "hlist_at_top",
70 'desc' => "{Hierarchy.hlist_at_top}",
71 'type' => "flag",
72 'reqd' => "no" },
73 { 'name' => "documents_last",
74 'desc' => "{Hierarchy.documents_last}",
75 'type' => "flag",
76 'reqd' => "no"}
77 ];
78
79my $options =
80{ 'name' => "HFileHierarchy",
81 'desc' => "{HFileHierarchy.desc}",
82 'abstract' => "yes",
83 'inherits' => "yes",
84 'args' => $arguments };
85
86
87sub new {
88 my ($class) = shift (@_);
89 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
90 push(@$classifierslist, $class);
91
92 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
93 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
94
95 my $self = new BasClas($classifierslist, $inputargs, $hashArgOptLists);
96
97 if ($self->{'info_only'}) {
98 # don't worry about any options etc
99 return bless $self, $class;
100 }
101
102 my $metadata = $self->{'metadata'};
103 if (!$metadata) {
104 print STDERR "$class Error: required option -metadata not supplied\n";
105 $self->print_txt_usage(""); # Use default resource bundle
106
107 die "$class Error: required option -metadata not supplied\n";
108 }
109
110 $self->{'buttonname'} = $self->generate_title_from_metadata($metadata) unless ($self->{'buttonname'});
111
112 #$self->{'metaname'} = $metadata;
113 my @meta_list = split(/,/, $metadata);
114 $self->{'meta_list'} = \@meta_list;
115
116 # sort = undef in this case is the same as sort=nosort
117 if ($self->{'sort'} eq "nosort") {
118 $self->{'sort'} = undef;
119 }
120
121 # sortname is handled a bit differently - kjdon
122 # undef means to sort, but use the metadata value from -metadata
123 # because there is no one metadata value to get for sorting when
124 # we have a list of possible metadata.
125 # To get no sorting, set sortname = 'nosort'
126 # we don't need to set it to undef if its not defined do we???
127
128# if (!$self->{'sort'}) {
129# if (defined ($metadata)) {
130# $sortname = undef;
131# } else {
132# $sortname = "nosort";
133# }
134# }
135 # $self->{'sortname'} = $sortname;
136
137 #if (defined $self->{'sort'} && $reverse_sort) {
138# $self->{'reverse_sort'} = 1;
139 # }
140
141 if ($self->{'hfile'}) {
142 my $hfile = $self->{'hfile'};
143 my $subjectfile;
144 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
145 if (!-e $subjectfile) {
146 my $collfile = $subjectfile;
147 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
148 if (!-e $subjectfile) {
149 my $outhandle = $self->{'outhandle'};
150 print STDERR "\nHFileHierarchy Error: Can't locate subject file $hfile\n";
151 print STDERR "This file should be in $collfile or $subjectfile\n";
152 $self->print_txt_usage(""); # Use default resource bundle
153 print STDERR "\nHFileHierarchy Error: Can't locate subject file $hfile\n";
154 print STDERR "This file should be in $collfile or $subjectfile\n";
155 die "\n";
156 }
157 }
158 $self->{'descriptorlist'} = {}; # first field in subject file
159 $self->{'locatorlist'} = {}; # second field in subject file
160 $self->{'subjectfile'} = $subjectfile;
161 }
162
163
164 # $self->{'firstvalueonly'} = $firstvalueonly;
165 # $self->{'allvalues'} = $allvalues;
166
167 #$self->{'hlist_at_top'} = $hlist_at_top;
168
169 # Clean out the unused keys
170 delete $self->{'metadata'};
171 delete $self->{'hfile'};
172
173 return bless $self, $class;
174}
175
176sub init {
177 my $self = shift (@_);
178
179 my $subjectfile = $self->{'subjectfile'};
180 if (defined $subjectfile) {
181 # read in the subject file
182 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\S');
183 # $list is a hash that is indexed by the descriptor. The contents of this
184 # hash is a list of two items. The first item is the OID and the second item
185 # is the title
186 foreach my $descriptor (keys (%$list)) {
187 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
188 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
189 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
190 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
191 }
192 }
193 }
194}
195
196sub hfile_classify
197{
198 my $self = shift (@_);
199 my ($doc_obj,$sortmeta,$metavalues) = @_;
200
201 my $doc_OID = $doc_obj->get_OID();
202
203 foreach my $metaelement (@$metavalues) {
204 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
205 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
206 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
207 [$doc_OID, $sortmeta]);
208 my $localid = $self->{'descriptorlist'}->{$metaelement};
209 my $classid = $self->get_number();
210
211 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid");
212 }
213 }
214}
215
216
217
218
219sub hfile_get_classify_info {
220 my $self = shift (@_);
221
222 my ($classifyinfo) = @_;
223
224 my $list = $self->{'locatorlist'};
225
226 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
227 foreach my $OID (sort keys (%$list)) {
228 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
229
230 if (defined $self->{'sort'}) {
231 if ($self->{'reverse_sort'}) {
232 foreach my $subOID (sort {$b->[1] cmp $a->[1];} @{$list->{$OID}->{'contents'}}) {
233 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
234 }
235 }
236 else {
237 foreach my $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
238 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
239 }
240 }
241 }
242 else {
243 foreach my $subOID (@{$list->{$OID}->{'contents'}}) {
244 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
245 }
246 }
247 }
248
249 return $classifyinfo;
250}
251
252
253sub supports_memberof {
254 my $self = shift(@_);
255
256 return "true";
257}
258
259sub get_OID_entry {
260 my $self = shift (@_);
261 my ($OID, $classifyinfo, $title, $classifytype) = @_;
262
263 $OID = "" unless defined $OID;
264 $OID =~ s/^\.+//;
265
266 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
267 $tailOID = "" unless defined $tailOID;
268
269 if (!defined $headOID) {
270 $classifyinfo->{'Title'} = $title;
271 $classifyinfo->{'classifytype'} = $classifytype;
272 return $classifyinfo;
273 }
274
275 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
276 if ($self->{'documents_last'}) {
277 # documents should come after nodes in the classifier
278
279 my $doc_pos = 0;
280 foreach my $thing (@{$classifyinfo->{'contains'}}) {
281 last if defined $thing->{'OID'};
282 $doc_pos++;
283 }
284
285 while ($doc_pos < $headOID) {
286 splice(@{$classifyinfo->{'contains'}}, $doc_pos, 0, $self->get_entry("", $classifytype));
287 $doc_pos++;
288 }
289
290 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $title, $classifytype);
291
292 }
293
294 # else, documents come before nodes
295 my $offset = 0;
296 foreach my $thing (@{$classifyinfo->{'contains'}}) {
297 $offset ++ if defined $thing->{'OID'};
298 }
299
300 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
301 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
302 }
303
304 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
305}
306
307sub get_entry {
308 my $self = shift (@_);
309 my ($title, $childtype, $thistype) = @_;
310 my $memberof = &supports_memberof();
311
312 # organise into classification structure
313 my %classifyinfo = ('childtype'=>$childtype,
314 'Title'=>$title,
315 'supportsmemberof'=>$memberof,
316 'contains'=>[]);
317 $classifyinfo{'thistype'} = $thistype
318 if defined $thistype && $thistype =~ /\w/;
319
320 return \%classifyinfo;
321}
322
323
3241;
Note: See TracBrowser for help on using the repository browser.