source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 6976

Last change on this file since 6976 was 6976, checked in by kjdon, 20 years ago

removed Johns construction error stuff so that it dies during new if metadata and hfile args are not valid

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.3 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28
29package Hierarchy;
30
31use BasClas;
32use util;
33use cfgread;
34use sorttools;
35
36sub BEGIN {
37 @ISA = ('BasClas');
38}
39
40my $arguments =
41 [ { 'name' => "metadata",
42 'desc' => "{Hierarchy.metadata}",
43 'type' => "metadata",
44 'reqd' => "yes" },
45 { 'name' => "hfile",
46 'desc' => "{Hierarchy.hfile}",
47 'type' => "string",
48 'deft' => "",
49 'reqd' => "yes" },
50 { 'name' => "buttonname",
51 'desc' => "{BasClas.buttonname}",
52 'type' => "string",
53 'deft' => "{BasClas.metadata.deft}",
54 'reqd' => "no" },
55 { 'name' => "sort",
56 'desc' => "{Hierarchy.sort}",
57 'type' => "string",
58 'deft' => "{BasClas.metadata.deft}",
59 'reqd' => "no" },
60 { 'name' => "hlist_at_top",
61 'desc' => "{Hierarchy.hlist_at_top}",
62 'type' => "flag",
63 'reqd' => "no" } ];
64
65my $options =
66{ 'name' => "Hierarchy",
67 'desc' => "{Hierarchy.desc}",
68 'abstract' => "no",
69 'inherits' => "yes",
70 'args' => $arguments };
71
72
73sub new {
74 my $class = shift (@_);
75 my $self = new BasClas($class, @_);
76
77 my $option_list = $self->{'option_list'};
78 push( @{$option_list}, $options );
79
80 if ($self->{'info_only'}) {
81 # created from classinfo.pl - don't need to parse the arguments
82 return bless $self, $class;
83 }
84
85 my ($hfile, $metadata, $sortname, $title, $hlist_at_top);
86
87 if (!parsargv::parse(\@_,
88 q^buttonname/.*/^, \$title,
89 q^sort/.*/^, \$sortname,
90 q^hfile/.*/^, \$hfile,
91 q^metadata/.*/^, \$metadata,
92 q^hlist_at_top^, \$hlist_at_top,
93 "allow_extra_options")) {
94
95 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
96 $self->print_txt_usage(""); # Use default resource bundle
97 die "\n";
98
99 }
100
101 if (!$metadata) {
102 print STDERR "$class: -metadata option REQUIRED \n";
103 $self->print_txt_usage(""); # Use default resource bundle
104
105 die "$class: -metadata option REQUIRED\n";
106 }
107
108 if (!$hfile) {
109 print STDERR "$class: -hfile option REQUIRED \n";
110 $self->print_txt_usage(""); # Use default resource bundle
111
112 die "$class: -hfile option REQUIRED\n";
113 }
114
115 $title = $metadata unless ($title);
116 # if no sortname specified, it defaults to metadata
117 $sortname = $metadata unless ($sortname);
118 $sortname = undef if $sortname =~ /^nosort$/;
119
120 my $subjectfile;
121 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
122 if (!-e $subjectfile) {
123 my $collfile = $subjectfile;
124 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
125 if (!-e $subjectfile) {
126 my $outhandle = $self->{'outhandle'};
127 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
128 print STDERR "This file should be in $collfile or $subjectfile\n";
129 $self->print_txt_usage(""); # Use default resource bundle
130 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
131 print STDERR "This file should be in $collfile or $subjectfile\n";
132 die "\n";
133 }
134 }
135
136 $self->{'descriptorlist'} = {}; # first field in subject file
137 $self->{'locatorlist'} = {}; # second field in subject file
138 $self->{'subjectfile'} = $subjectfile;
139 $self->{'metaname'} = $metadata;
140 $self->{'sortname'} = $sortname;
141 $self->{'title'} = $title;
142 $self->{'hlist_at_top'} = $hlist_at_top;
143
144 return bless $self, $class;
145}
146
147sub init {
148 my $self = shift (@_);
149
150 # read in the subject file
151 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
152 # $list is a hash that is indexed by the descriptor. The contents of this
153 # hash is a list of two items. The first item is the OID and the second item
154 # is the title
155 foreach $descriptor (keys (%$list)) {
156 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
157 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
158 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
159 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
160 }
161 }
162}
163
164sub classify {
165 my $self = shift (@_);
166 my ($doc_obj) = @_;
167
168 my $doc_OID = $doc_obj->get_OID();
169
170 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
171 $self->{'metaname'});
172
173 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language');
174 $lang = 'en' unless defined $lang;
175
176 my $sortmeta = "";
177 if (defined $self->{'sortname'}) {
178 if ($self->{'sortname'} =~ /^filename$/i) {
179 $sortmeta = $doc_obj->get_source_filename();
180 } else {
181 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
182 $self->{'sortname'});
183 if (defined $sortmeta) {
184 if ($self->{'sortname'} eq "Creator") {
185 if ($lang eq 'en') {
186 &sorttools::format_string_name_english (\$sortmeta);
187 }
188 } else {
189 if ($lang eq 'en') {
190 &sorttools::format_string_english (\$sortmeta);
191 }
192 }
193 }
194 }
195 $sortmeta = "" unless defined $sortmeta;
196 }
197
198 foreach $metaelement (@$metadata) {
199 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
200 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
201 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
202 [$doc_OID, $sortmeta]);
203 }
204 }
205}
206
207sub get_classify_info {
208 my $self = shift (@_);
209
210 my $list = $self->{'locatorlist'};
211
212 my ($classifyinfo);
213 if ($self->{'hlist_at_top'}) {
214 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible");
215 } else {
216 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
217 }
218 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
219 foreach $OID (sort keys (%$list)) {
220 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
221
222 if (defined $self->{'sortname'}) {
223 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
224 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
225 }
226 } else {
227 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
228 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
229 }
230 }
231 }
232
233 return $classifyinfo;
234}
235
236sub get_OID_entry {
237 my $self = shift (@_);
238 my ($OID, $classifyinfo, $title, $classifytype) = @_;
239
240 $OID = "" unless defined $OID;
241 $OID =~ s/^\.+//;
242
243 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
244 $tailOID = "" unless defined $tailOID;
245
246 if (!defined $headOID) {
247 $classifyinfo->{'Title'} = $title;
248 $classifyinfo->{'classifytype'} = $classifytype;
249 return $classifyinfo;
250 }
251
252 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
253 my $offset = 0;
254 foreach $thing (@{$classifyinfo->{'contains'}}) {
255 $offset ++ if defined $thing->{'OID'};
256 }
257
258 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
259 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
260 }
261
262 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
263}
264
265sub get_entry {
266 my $self = shift (@_);
267 my ($title, $childtype, $thistype) = @_;
268
269 # organise into classification structure
270 my %classifyinfo = ('childtype'=>$childtype,
271 'Title'=>$title,
272 'contains'=>[]);
273 $classifyinfo{'thistype'} = $thistype
274 if defined $thistype && $thistype =~ /\w/;
275
276 return \%classifyinfo;
277}
278
279
2801;
Note: See TracBrowser for help on using the repository browser.