source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 6761

Last change on this file since 6761 was 6761, checked in by kjdon, 20 years ago

the buttonname and/or sort defaults were in English - replaced them with a key name for the strings db

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.9 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28# options for this classifier are:
29# -hfile file.txt - classification file
30# -metadata Metaname - metadata field to test against file.txt
31# -sort Meta - this option is optional (genious;-). by default this
32# classifier will sort documents within each section
33# alphabetically by Title. sort=nosort prevents sorting
34# (i.e. documents will end up in build order), sort=Meta
35# will sort each field alphabetically by Meta (Meta may
36# also be 'Filename' to sort by the original filename).
37# -buttonname Title - another optional field. this is what will end up in the
38# Title field for this classification. if not present it
39# defaults to Metaname
40# -hlist_at_top - use a horizontal list for the top level (i.e. display it
41# like an AZList classification)
42
43# 12/05/02 Added usage datastructure - John Thompson
44# 12/05/02 Modified new() so as not to die on error, only on init() - John Thompson
45
46package Hierarchy;
47
48use BasClas;
49use util;
50use cfgread;
51use sorttools;
52
53sub BEGIN {
54 @ISA = ('BasClas');
55}
56
57my $arguments =
58 [ { 'name' => "metadata",
59 'desc' => "{Hierarchy.metadata}",
60 'type' => "metadata",
61 'reqd' => "yes" },
62 { 'name' => "hfile",
63 'desc' => "{Hierarchy.hfile}",
64 'type' => "string",
65 'deft' => "",
66 'reqd' => "yes" },
67 { 'name' => "buttonname",
68 'desc' => "{Hierarchy.buttonname}",
69 'type' => "string",
70 'deft' => "{BasClas.metadata.deft}",
71 'reqd' => "no" },
72 { 'name' => "sort",
73 'desc' => "{Hierarchy.sort}",
74 'type' => "string",
75 'deft' => "{BasClas.metadata.deft}",
76 'reqd' => "no" },
77 { 'name' => "hlist_at_top",
78 'desc' => "{Hierarchy.hlist_at_top}",
79 'type' => "flag",
80 'reqd' => "no" } ];
81
82my $options =
83{ 'name' => "Hierarchy",
84 'desc' => "{Hierarchy.desc}",
85 'abstract' => "no",
86 'inherits' => "yes",
87 'args' => $arguments };
88
89# sub print_usage {
90# print STDERR "
91# usage: classify Hierarchy [options]
92# options:
93
94# -buttonname X Title field for this classification.
95# Defaults to metadata name.
96
97# -metadata X Metadata field used for classification,
98# list will be sorted by this element,
99# unless -sort is used.
100
101# -hfile X The classification structure file
102
103# -sort X Metadata field to sort by (defaults to -metadata)
104# use '-sort nosort' for no sorting.
105
106# -hlist_at_top Display the first level of the classification
107# horizontally.
108# ";
109# }
110
111
112sub new {
113 my $class = shift (@_);
114 my $self = new BasClas($class, @_);
115
116 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
117 my $option_list = $self->{'option_list'};
118 push( @{$option_list}, $options );
119
120 my ($hfile, $metadata, $sortname, $title, $hlist_at_top);
121
122 if (!parsargv::parse(\@_,
123 q^buttonname/.*/^, \$title,
124 q^sort/.*/^, \$sortname,
125 q^hfile/.*/^, \$hfile,
126 q^metadata/.*/^, \$metadata,
127 q^hlist_at_top^, \$hlist_at_top,
128 "allow_extra_options")) {
129
130 $self->{'construction_error'} = "Incorrect options passed to $class, check your collect.cfg file.";
131 }
132
133 if (!$metadata) {
134 $self->{'construction_error'} = "Hierarchy error: no metadata supplied.";
135 }
136
137 $title = $metadata unless ($title);
138 # if no sortname specified, it defaults to metadata
139 $sortname = $metadata unless ($sortname);
140 $sortname = undef if $sortname =~ /^nosort$/;
141 my $subjectfile;
142
143 if (!$hfile) {
144 $self->{'construction_error'} = "Hierarchy error: No -hfile supplied.";
145 }
146 else
147 {
148 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
149 if (!-e $subjectfile) {
150 my $collfile = $subjectfile;
151 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
152 if (!-e $subjectfile) {
153 my $outhandle = $self->{'outhandle'};
154 $self->print_txt_usage(""); # Use default resource bundle
155 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
156 print STDERR "This file should be in $collfile or $subjectfile\n";
157 die "\n";
158 }
159 }
160 }
161
162 $self->{'descriptorlist'} = {}; # first field in subject file
163 $self->{'locatorlist'} = {}; # second field in subject file
164 $self->{'subjectfile'} = $subjectfile;
165 $self->{'metaname'} = $metadata;
166 $self->{'sortname'} = $sortname;
167 $self->{'title'} = $title;
168 $self->{'hlist_at_top'} = $hlist_at_top;
169
170 return bless $self, $class;
171}
172
173sub init {
174 my $self = shift (@_);
175
176 if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) {
177 print STDERR "Error: " , $self->{'construction_error'} , "\n";
178 $self->print_txt_usage(""); # Use default resource bundle
179 die "\n";
180 }
181
182 # read in the subject file
183 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
184 # $list is a hash that is indexed by the descriptor. The contents of this
185 # hash is a list of two items. The first item is the OID and the second item
186 # is the title
187 foreach $descriptor (keys (%$list)) {
188 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
189 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
190 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
191 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
192 }
193 }
194}
195
196sub classify {
197 my $self = shift (@_);
198 my ($doc_obj) = @_;
199
200 my $doc_OID = $doc_obj->get_OID();
201
202 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
203 $self->{'metaname'});
204
205 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language');
206 $lang = 'en' unless defined $lang;
207
208 my $sortmeta = "";
209 if (defined $self->{'sortname'}) {
210 if ($self->{'sortname'} =~ /^filename$/i) {
211 $sortmeta = $doc_obj->get_source_filename();
212 } else {
213 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
214 $self->{'sortname'});
215 if (defined $sortmeta) {
216 if ($self->{'sortname'} eq "Creator") {
217 if ($lang eq 'en') {
218 &sorttools::format_string_name_english (\$sortmeta);
219 }
220 } else {
221 if ($lang eq 'en') {
222 &sorttools::format_string_english (\$sortmeta);
223 }
224 }
225 }
226 }
227 $sortmeta = "" unless defined $sortmeta;
228 }
229
230 foreach $metaelement (@$metadata) {
231 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
232 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
233 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
234 [$doc_OID, $sortmeta]);
235 }
236 }
237}
238
239sub get_classify_info {
240 my $self = shift (@_);
241
242 my $list = $self->{'locatorlist'};
243
244 my ($classifyinfo);
245 if ($self->{'hlist_at_top'}) {
246 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible");
247 } else {
248 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
249 }
250 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
251 foreach $OID (sort keys (%$list)) {
252 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
253
254 if (defined $self->{'sortname'}) {
255 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
256 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
257 }
258 } else {
259 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
260 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
261 }
262 }
263 }
264
265 return $classifyinfo;
266}
267
268sub get_OID_entry {
269 my $self = shift (@_);
270 my ($OID, $classifyinfo, $title, $classifytype) = @_;
271
272 $OID = "" unless defined $OID;
273 $OID =~ s/^\.+//;
274
275 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
276 $tailOID = "" unless defined $tailOID;
277
278 if (!defined $headOID) {
279 $classifyinfo->{'Title'} = $title;
280 $classifyinfo->{'classifytype'} = $classifytype;
281 return $classifyinfo;
282 }
283
284 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
285 my $offset = 0;
286 foreach $thing (@{$classifyinfo->{'contains'}}) {
287 $offset ++ if defined $thing->{'OID'};
288 }
289
290 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
291 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
292 }
293
294 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
295}
296
297sub get_entry {
298 my $self = shift (@_);
299 my ($title, $childtype, $thistype) = @_;
300
301 # organise into classification structure
302 my %classifyinfo = ('childtype'=>$childtype,
303 'Title'=>$title,
304 'contains'=>[]);
305 $classifyinfo{'thistype'} = $thistype
306 if defined $thistype && $thistype =~ /\w/;
307
308 return \%classifyinfo;
309}
310
311
3121;
Note: See TracBrowser for help on using the repository browser.