source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 5646

Last change on this file since 5646 was 5645, checked in by mdewsnip, 21 years ago

Moved classifier descriptions into the resource bundle (perllib/strings.rb).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.9 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28# options for this classifier are:
29# -hfile file.txt - classification file
30# -metadata Metaname - metadata field to test against file.txt
31# -sort Meta - this option is optional (genious;-). by default this
32# classifier will sort documents within each section
33# alphabetically by Title. sort=nosort prevents sorting
34# (i.e. documents will end up in build order), sort=Meta
35# will sort each field alphabetically by Meta (Meta may
36# also be 'Filename' to sort by the original filename).
37# -buttonname Title - another optional field. this is what will end up in the
38# Title field for this classification. if not present it
39# defaults to Metaname
40# -hlist_at_top - use a horizontal list for the top level (i.e. display it
41# like an AZList classification)
42
43# 12/05/02 Added usage datastructure - John Thompson
44# 12/05/02 Modified new() so as not to die on error, only on init() - John Thompson
45
46package Hierarchy;
47
48use BasClas;
49use util;
50use cfgread;
51use sorttools;
52
53sub BEGIN {
54 @ISA = ('BasClas');
55}
56
57my $arguments =
58 [ { 'name' => "metadata",
59 'desc' => "{Hierarchy.metadata}",
60 'type' => "metadata",
61 'reqd' => "yes" },
62 { 'name' => "hfile",
63 'desc' => "{Hierarchy.hfile}",
64 'type' => "string",
65 'deft' => "",
66 'reqd' => "yes" },
67 { 'name' => "buttonname",
68 'desc' => "{Hierarchy.buttonname}",
69 'type' => "string",
70 'deft' => "Metadata element specified with -metadata",
71 'reqd' => "no" },
72 { 'name' => "sort",
73 'desc' => "{Hierarchy.sort}",
74 'type' => "string",
75 'deft' => "Metadata field specified with -metadata",
76 'reqd' => "no" },
77 { 'name' => "hlist_at_top",
78 'desc' => "{Hierarchy.hlist_at_top}",
79 'type' => "flag",
80 'reqd' => "no" } ];
81
82my $options =
83{ 'name' => "Hierarchy",
84 'desc' => "{Hierarchy.desc}",
85 'inherits' => "Yes" ,
86 'args' => $arguments };
87
88# sub print_usage {
89# print STDERR "
90# usage: classify Hierarchy [options]
91# options:
92
93# -buttonname X Title field for this classification.
94# Defaults to metadata name.
95
96# -metadata X Metadata field used for classification,
97# list will be sorted by this element,
98# unless -sort is used.
99
100# -hfile X The classification structure file
101
102# -sort X Metadata field to sort by (defaults to -metadata)
103# use '-sort nosort' for no sorting.
104
105# -hlist_at_top Display the first level of the classification
106# horizontally.
107# ";
108# }
109
110
111sub new {
112 my $class = shift (@_);
113 my $self = new BasClas($class, @_);
114
115 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
116 my $option_list = $self->{'option_list'};
117 push( @{$option_list}, $options );
118
119 my ($hfile, $metadata, $sortname, $title, $hlist_at_top);
120
121 if (!parsargv::parse(\@_,
122 q^buttonname/.*/^, \$title,
123 q^sort/.*/^, \$sortname,
124 q^hfile/.*/^, \$hfile,
125 q^metadata/.*/^, \$metadata,
126 q^hlist_at_top^, \$hlist_at_top,
127 "allow_extra_options")) {
128
129 $self->{'construction_error'} = "Incorrect options passed to $class, check your collect.cfg file.";
130 }
131
132 if (!$metadata) {
133 $self->{'construction_error'} = "Hierarchy error: no metadata supplied.";
134 }
135
136 $title = $metadata unless ($title);
137 # if no sortname specified, it defaults to metadata
138 $sortname = $metadata unless ($sortname);
139 $sortname = undef if $sortname =~ /^nosort$/;
140 my $subjectfile;
141
142 if (!$hfile) {
143 $self->{'construction_error'} = "Hierarchy error: No -hfile supplied.";
144 }
145 else
146 {
147 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
148 if (!-e $subjectfile) {
149 my $collfile = $subjectfile;
150 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
151 if (!-e $subjectfile) {
152 my $outhandle = $self->{'outhandle'};
153 $self->print_txt_usage(""); # Use default resource bundle
154 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
155 print STDERR "This file should be in $collfile or $subjectfile\n";
156 die "\n";
157 }
158 }
159 }
160
161 $self->{'descriptorlist'} = {}; # first field in subject file
162 $self->{'locatorlist'} = {}; # second field in subject file
163 $self->{'subjectfile'} = $subjectfile;
164 $self->{'metaname'} = $metadata;
165 $self->{'sortname'} = $sortname;
166 $self->{'title'} = $title;
167 $self->{'hlist_at_top'} = $hlist_at_top;
168
169 return bless $self, $class;
170}
171
172sub init {
173 my $self = shift (@_);
174
175 if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) {
176 print STDERR "Error: " , $self->{'construction_error'} , "\n";
177 $self->print_txt_usage(""); # Use default resource bundle
178 die "\n";
179 }
180
181 # read in the subject file
182 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
183
184 # $list is a hash that is indexed by the descriptor. The contents of this
185 # hash is a list of two items. The first item is the OID and the second item
186 # is the title
187 foreach $descriptor (keys (%$list)) {
188 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
189 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
190 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
191 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
192 }
193 }
194}
195
196sub classify {
197 my $self = shift (@_);
198 my ($doc_obj) = @_;
199
200 my $doc_OID = $doc_obj->get_OID();
201
202 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
203 $self->{'metaname'});
204
205 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language');
206 $lang = 'en' unless defined $lang;
207
208 my $sortmeta = "";
209 if (defined $self->{'sortname'}) {
210 if ($self->{'sortname'} =~ /^filename$/i) {
211 $sortmeta = $doc_obj->get_source_filename();
212 } else {
213 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
214 $self->{'sortname'});
215 if (defined $sortmeta) {
216 if ($self->{'sortname'} eq "Creator") {
217 if ($lang eq 'en') {
218 &sorttools::format_string_name_english (\$sortmeta);
219 }
220 } else {
221 if ($lang eq 'en') {
222 &sorttools::format_string_english (\$sortmeta);
223 }
224 }
225 }
226 }
227 $sortmeta = "" unless defined $sortmeta;
228 }
229
230 foreach $metaelement (@$metadata) {
231 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
232 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
233 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
234 [$doc_OID, $sortmeta]);
235 }
236 }
237}
238
239sub get_classify_info {
240 my $self = shift (@_);
241
242 my $list = $self->{'locatorlist'};
243
244 my ($classifyinfo);
245 if ($self->{'hlist_at_top'}) {
246 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible");
247 } else {
248 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
249 }
250 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
251 foreach $OID (sort keys (%$list)) {
252 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
253
254 if (defined $self->{'sortname'}) {
255 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
256 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
257 }
258 } else {
259 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
260 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
261 }
262 }
263 }
264
265 return $classifyinfo;
266}
267
268sub get_OID_entry {
269 my $self = shift (@_);
270 my ($OID, $classifyinfo, $title, $classifytype) = @_;
271
272 $OID = "" unless defined $OID;
273 $OID =~ s/^\.+//;
274
275 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
276 $tailOID = "" unless defined $tailOID;
277
278 if (!defined $headOID) {
279 $classifyinfo->{'Title'} = $title;
280 $classifyinfo->{'classifytype'} = $classifytype;
281 return $classifyinfo;
282 }
283
284 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
285 my $offset = 0;
286 foreach $thing (@{$classifyinfo->{'contains'}}) {
287 $offset ++ if defined $thing->{'OID'};
288 }
289
290 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
291 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
292 }
293
294 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
295}
296
297sub get_entry {
298 my $self = shift (@_);
299 my ($title, $childtype, $thistype) = @_;
300
301 # organise into classification structure
302 my %classifyinfo = ('childtype'=>$childtype,
303 'Title'=>$title,
304 'contains'=>[]);
305 $classifyinfo{'thistype'} = $thistype
306 if defined $thistype && $thistype =~ /\w/;
307
308 return \%classifyinfo;
309}
310
311
3121;
Note: See TracBrowser for help on using the repository browser.