source: trunk/gsdl/perllib/classify/HFileHierarchy.pm@ 9206

Last change on this file since 9206 was 9206, checked in by davidb, 19 years ago

Reworking of AutoHierarchy and Hierarchy so they are merged as one "super"
hierarchy classifier that is backward compatible.

The original Hierarchy is now HFileHierarchy. This is now an abstract
class that encapsulates everything needed to use the -hfile option.

AutoHierarchy has now been renamed to Hierarchy. Classifiers options between
the two have been merged and kept backwards compatible. If a user specifies
-hfile to Hierarhcy it is patched through to HFileHierarchy to do things
the way they used to be done. If no -hfile flag is specified then the
newer "auto" ability is used.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
Line 
1###########################################################################
2#
3# HFileHierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28
29package HFileHierarchy;
30
31use BasClas;
32use util;
33use cfgread;
34use sorttools;
35
36sub BEGIN {
37 @HFileHierarchy::ISA = ('BasClas');
38}
39
40my $arguments =
41 [ { 'name' => "metadata",
42 'desc' => "{AZCompactList.metadata}",
43 'type' => "metadata",
44 'reqd' => "yes" },
45 { 'name' => "firstvalueonly",
46 'desc' => "{AZCompactList.firstvalueonly}",
47 'type' => "flag",
48 'reqd' => "no" },
49 { 'name' => "allvalues",
50 'desc' => "{AZCompactList.allvalues}",
51 'type' => "flag",
52 'reqd' => "no" },
53 { 'name' => "sort",
54 'desc' => "{Hierarchy.sort}",
55 'type' => "string",
56 'deft' => "{BasClas.metadata.deft}",
57 'reqd' => "no" },
58 { 'name' => "hfile",
59 'desc' => "{Hierarchy.hfile}",
60 'type' => "string",
61 'deft' => "",
62 'reqd' => "no" },
63 { 'name' => "buttonname",
64 'desc' => "{BasClas.buttonname}",
65 'type' => "string",
66 'deft' => "{BasClas.metadata.deft}",
67 'reqd' => "no" },
68 { 'name' => "sort",
69 'desc' => "{Hierarchy.sort}",
70 'type' => "string",
71 'deft' => "{BasClas.metadata.deft}",
72 'reqd' => "no" },
73 { 'name' => "reverse_sort",
74 'desc' => "{Hierarchy.reverse_sort}",
75 'type' => "flag",
76 'reqd' => "no" },
77 { 'name' => "hlist_at_top",
78 'desc' => "{Hierarchy.hlist_at_top}",
79 'type' => "flag",
80 'reqd' => "no" } ];
81
82my $options =
83{ 'name' => "HFileHierarchy",
84 'desc' => "{Hierarchy.desc}",
85 'abstract' => "yes",
86 'inherits' => "yes",
87 'args' => $arguments };
88
89
90sub new {
91 my $class = shift (@_);
92 my $self = new BasClas($class, @_);
93
94 my $option_list = $self->{'option_list'};
95 push( @{$option_list}, $options );
96
97 if ($self->{'info_only'}) {
98 # created from classinfo.pl - don't need to parse the arguments
99 return bless $self, $class;
100 }
101
102 my ($hfile, $metadata, $buttonname, $sortname, $reverse_sort, $title,
103 $hlist_at_top);
104 my $firstvalueonly = 0;
105 my $allvalues = 0;
106
107 if (!parsargv::parse(\@_,
108 q^metadata/.*/^, \$metadata,
109 q^buttonname/.*/^, \$buttonname,
110 q^firstvalueonly^, \$firstvalueonly,
111 q^allvalues^, \$allvalues,
112 q^sort/.*/^, \$sortname,
113 q^reverse_sort^, \$reverse_sort,
114 q^hfile/.*/^, \$hfile,
115 q^hlist_at_top^, \$hlist_at_top,
116 "allow_extra_options")) {
117
118 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
119 $self->print_txt_usage(""); # Use default resource bundle
120 die "\n";
121
122 }
123
124 if (!$metadata) {
125 print STDERR "$class Error: required option -metadata not supplied\n";
126 $self->print_txt_usage(""); # Use default resource bundle
127
128 die "$class Error: required option -metadata not supplied\n";
129 }
130
131 $buttonname = $self->generate_title_from_metadata($metadata) unless ($buttonname);
132 $title = $buttonname;
133 $self->{'title'} = $title;
134
135 $self->{'metaname'} = $metadata;
136 my @meta_list = split(/,/, $metadata);
137 $self->{'meta_list'} = \@meta_list;
138
139 # sortname is handled a bit differently - kjdon
140 # undef means to sort, but use the metadata value from -metadata
141 # because there is no one metadata value to get for sorting when
142 # we have a list of possible metadata.
143 # To get no sorting, set sortname = 'nosort'
144 if (!$sortname) {
145 if (defined ($metadata)) {
146 $sortname = undef;
147 } else {
148 $sortname = "nosort";
149 }
150 }
151 $self->{'sortname'} = $sortname;
152
153 if (defined $sortname && $reverse_sort) {
154 $self->{'reverse_sort'} = 1;
155 }
156
157 if ($hfile) {
158 my $subjectfile;
159 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
160 if (!-e $subjectfile) {
161 my $collfile = $subjectfile;
162 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
163 if (!-e $subjectfile) {
164 my $outhandle = $self->{'outhandle'};
165 print STDERR "\nHFileHierarchy Error: Can't locate subject file $hfile\n";
166 print STDERR "This file should be in $collfile or $subjectfile\n";
167 $self->print_txt_usage(""); # Use default resource bundle
168 print STDERR "\nHFileHierarchy Error: Can't locate subject file $hfile\n";
169 print STDERR "This file should be in $collfile or $subjectfile\n";
170 die "\n";
171 }
172 }
173 $self->{'descriptorlist'} = {}; # first field in subject file
174 $self->{'locatorlist'} = {}; # second field in subject file
175 $self->{'subjectfile'} = $subjectfile;
176 }
177
178
179 $self->{'firstvalueonly'} = $firstvalueonly;
180 $self->{'allvalues'} = $allvalues;
181
182 $self->{'hlist_at_top'} = $hlist_at_top;
183
184
185 return bless $self, $class;
186}
187
188sub init {
189 my $self = shift (@_);
190
191 my $subjectfile = $self->{'subjectfile'};
192 if (defined $subjectfile) {
193 # read in the subject file
194 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\S');
195 # $list is a hash that is indexed by the descriptor. The contents of this
196 # hash is a list of two items. The first item is the OID and the second item
197 # is the title
198 foreach $descriptor (keys (%$list)) {
199 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
200 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
201 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
202 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
203 }
204 }
205 }
206}
207
208sub hfile_classify
209{
210 my $self = shift (@_);
211 my ($doc_obj,$sortmeta,$metavalues) = @_;
212
213 my $doc_OID = $doc_obj->get_OID();
214
215 foreach my $metaelement (@$metavalues) {
216 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
217 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
218 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
219 [$doc_OID, $sortmeta]);
220 my $localid = $self->{'descriptorlist'}->{$metaelement};
221 my $classid = $self->get_number();
222
223 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid");
224 }
225 }
226}
227
228
229
230
231sub hfile_get_classify_info {
232 my $self = shift (@_);
233
234 my ($classifyinfo) = @_;
235
236 my $list = $self->{'locatorlist'};
237
238 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
239 foreach $OID (sort keys (%$list)) {
240 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
241
242 if (defined $self->{'sortname'}) {
243 if ($self->{'reverse_sort'}) {
244 foreach $subOID (sort {$b->[1] cmp $a->[1];} @{$list->{$OID}->{'contents'}}) {
245 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
246 }
247 }
248 else {
249 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
250 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
251 }
252 }
253 }
254 else {
255 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
256 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
257 }
258 }
259 }
260
261 return $classifyinfo;
262}
263
264
265sub supports_memberof {
266 my $self = shift(@_);
267
268 return "true";
269}
270
271sub get_OID_entry {
272 my $self = shift (@_);
273 my ($OID, $classifyinfo, $title, $classifytype) = @_;
274
275 $OID = "" unless defined $OID;
276 $OID =~ s/^\.+//;
277
278 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
279 $tailOID = "" unless defined $tailOID;
280
281 if (!defined $headOID) {
282 $classifyinfo->{'Title'} = $title;
283 $classifyinfo->{'classifytype'} = $classifytype;
284 return $classifyinfo;
285 }
286
287 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
288 my $offset = 0;
289 foreach my $thing (@{$classifyinfo->{'contains'}}) {
290 $offset ++ if defined $thing->{'OID'};
291 }
292
293 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
294 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
295 }
296
297 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
298}
299
300sub get_entry {
301 my $self = shift (@_);
302 my ($title, $childtype, $thistype) = @_;
303 my $memberof = &supports_memberof();
304
305 # organise into classification structure
306 my %classifyinfo = ('childtype'=>$childtype,
307 'Title'=>$title,
308 'supportsmemberof'=>$memberof,
309 'contains'=>[]);
310 $classifyinfo{'thistype'} = $thistype
311 if defined $thistype && $thistype =~ /\w/;
312
313 return \%classifyinfo;
314}
315
316
3171;
Note: See TracBrowser for help on using the repository browser.