source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 7405

Last change on this file since 7405 was 6984, checked in by kjdon, 20 years ago

changed some error messages

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28
29package Hierarchy;
30
31use BasClas;
32use util;
33use cfgread;
34use sorttools;
35
36sub BEGIN {
37 @ISA = ('BasClas');
38}
39
40my $arguments =
41 [ { 'name' => "metadata",
42 'desc' => "{Hierarchy.metadata}",
43 'type' => "metadata",
44 'reqd' => "yes" },
45 { 'name' => "hfile",
46 'desc' => "{Hierarchy.hfile}",
47 'type' => "string",
48 'deft' => "",
49 'reqd' => "yes" },
50 { 'name' => "buttonname",
51 'desc' => "{BasClas.buttonname}",
52 'type' => "string",
53 'deft' => "{BasClas.metadata.deft}",
54 'reqd' => "no" },
55 { 'name' => "sort",
56 'desc' => "{Hierarchy.sort}",
57 'type' => "string",
58 'deft' => "{BasClas.metadata.deft}",
59 'reqd' => "no" },
60 { 'name' => "hlist_at_top",
61 'desc' => "{Hierarchy.hlist_at_top}",
62 'type' => "flag",
63 'reqd' => "no" } ];
64
65my $options =
66{ 'name' => "Hierarchy",
67 'desc' => "{Hierarchy.desc}",
68 'abstract' => "no",
69 'inherits' => "yes",
70 'args' => $arguments };
71
72
73sub new {
74 my $class = shift (@_);
75 my $self = new BasClas($class, @_);
76
77 my $option_list = $self->{'option_list'};
78 push( @{$option_list}, $options );
79
80 if ($self->{'info_only'}) {
81 # created from classinfo.pl - don't need to parse the arguments
82 return bless $self, $class;
83 }
84
85 my ($hfile, $metadata, $sortname, $title, $hlist_at_top);
86
87 if (!parsargv::parse(\@_,
88 q^buttonname/.*/^, \$title,
89 q^sort/.*/^, \$sortname,
90 q^hfile/.*/^, \$hfile,
91 q^metadata/.*/^, \$metadata,
92 q^hlist_at_top^, \$hlist_at_top,
93 "allow_extra_options")) {
94
95 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
96 $self->print_txt_usage(""); # Use default resource bundle
97 die "\n";
98
99 }
100
101 if (!$metadata) {
102 print STDERR "$class Error: required option -metadata not supplied\n";
103 $self->print_txt_usage(""); # Use default resource bundle
104
105 die "$class Error: required option -metadata not supplied\n";
106 }
107
108 if (!$hfile) {
109 print STDERR "$class Error: required option -hfile not supplied\n";
110 $self->print_txt_usage(""); # Use default resource bundle
111
112 die "$class Error: required option -hfile not supplied\n";
113 }
114
115 $title = $metadata unless ($title);
116 # if no sortname specified, it defaults to metadata
117 $sortname = $metadata unless ($sortname);
118 $sortname = undef if $sortname =~ /^nosort$/;
119
120 my $subjectfile;
121 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
122 if (!-e $subjectfile) {
123 my $collfile = $subjectfile;
124 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
125 if (!-e $subjectfile) {
126 my $outhandle = $self->{'outhandle'};
127 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
128 print STDERR "This file should be in $collfile or $subjectfile\n";
129 $self->print_txt_usage(""); # Use default resource bundle
130 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
131 print STDERR "This file should be in $collfile or $subjectfile\n";
132 die "\n";
133 }
134 }
135
136 $self->{'descriptorlist'} = {}; # first field in subject file
137 $self->{'locatorlist'} = {}; # second field in subject file
138 $self->{'subjectfile'} = $subjectfile;
139 $self->{'metaname'} = $metadata;
140 $self->{'sortname'} = $sortname;
141 $self->{'title'} = $title;
142 $self->{'hlist_at_top'} = $hlist_at_top;
143
144 return bless $self, $class;
145}
146
147sub init {
148 my $self = shift (@_);
149
150 # read in the subject file
151 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
152 # $list is a hash that is indexed by the descriptor. The contents of this
153 # hash is a list of two items. The first item is the OID and the second item
154 # is the title
155 foreach $descriptor (keys (%$list)) {
156 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
157 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
158 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
159 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
160 }
161 }
162}
163
164sub classify {
165 my $self = shift (@_);
166 my ($doc_obj) = @_;
167
168 my $doc_OID = $doc_obj->get_OID();
169
170 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
171 $self->{'metaname'});
172
173 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language');
174 $lang = 'en' unless defined $lang;
175
176 my $sortmeta = "";
177 if (defined $self->{'sortname'}) {
178 if ($self->{'sortname'} =~ /^filename$/i) {
179 $sortmeta = $doc_obj->get_source_filename();
180 } else {
181 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
182 $self->{'sortname'});
183 if (defined $sortmeta) {
184 if ($self->{'sortname'} eq "Creator") {
185 if ($lang eq 'en') {
186 &sorttools::format_string_name_english (\$sortmeta);
187 }
188 } else {
189 if ($lang eq 'en') {
190 &sorttools::format_string_english (\$sortmeta);
191 }
192 }
193 }
194 }
195 $sortmeta = "" unless defined $sortmeta;
196 }
197
198 foreach $metaelement (@$metadata) {
199 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
200 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
201 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
202 [$doc_OID, $sortmeta]);
203 }
204 }
205}
206
207sub get_classify_info {
208 my $self = shift (@_);
209
210 my $list = $self->{'locatorlist'};
211
212 my ($classifyinfo);
213 if ($self->{'hlist_at_top'}) {
214 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible");
215 } else {
216 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
217 }
218 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
219 foreach $OID (sort keys (%$list)) {
220 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
221
222 if (defined $self->{'sortname'}) {
223 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
224 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
225 }
226 } else {
227 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
228 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
229 }
230 }
231 }
232
233 return $classifyinfo;
234}
235
236sub get_OID_entry {
237 my $self = shift (@_);
238 my ($OID, $classifyinfo, $title, $classifytype) = @_;
239
240 $OID = "" unless defined $OID;
241 $OID =~ s/^\.+//;
242
243 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
244 $tailOID = "" unless defined $tailOID;
245
246 if (!defined $headOID) {
247 $classifyinfo->{'Title'} = $title;
248 $classifyinfo->{'classifytype'} = $classifytype;
249 return $classifyinfo;
250 }
251
252 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
253 my $offset = 0;
254 foreach $thing (@{$classifyinfo->{'contains'}}) {
255 $offset ++ if defined $thing->{'OID'};
256 }
257
258 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
259 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
260 }
261
262 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
263}
264
265sub get_entry {
266 my $self = shift (@_);
267 my ($title, $childtype, $thistype) = @_;
268
269 # organise into classification structure
270 my %classifyinfo = ('childtype'=>$childtype,
271 'Title'=>$title,
272 'contains'=>[]);
273 $classifyinfo{'thistype'} = $thistype
274 if defined $thistype && $thistype =~ /\w/;
275
276 return \%classifyinfo;
277}
278
279
2801;
Note: See TracBrowser for help on using the repository browser.