source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 2022

Last change on this file since 2022 was 2022, checked in by sjboddie, 23 years ago

Caught some of the classifiers up with the documentation (finally). The
old "title" option has been replaced with the "buttonname" option.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1###########################################################################
2#
3# Hierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for generating hierarchical classifications
27
28# options for this classifier are:
29# -hfile file.txt - classification file
30# -metadata Metaname - metadata field to test against file.txt
31# -sort Meta - this option is optional (genious;-). by default this
32# classifier will sort documents within each section
33# alphabetically by Title. sort=nosort prevents sorting
34# (i.e. documents will end up in build order), sort=Meta
35# will sort each field alphabetically by Meta (Meta may
36# also be 'Filename' to sort by the original filename).
37# -buttonname Title - another optional field. this is what will end up in the
38# Title field for this classification. if not present it
39# defaults to Metaname
40
41package Hierarchy;
42
43use BasClas;
44use util;
45use cfgread;
46use sorttools;
47
48sub BEGIN {
49 @ISA = ('BasClas');
50}
51
52sub print_usage {
53 print STDERR "
54 usage: classify Hierarchy [options]
55 options:
56
57 -buttonname X Title field for this classification.
58 Defaults to metadata name.
59
60 -metadata X Metadata field used for classification,
61 list will be sorted by this element.
62
63 -hfile X The classification structure file
64
65 -sort X Metadata field to sort by (defaults to none)
66";
67}
68
69
70sub new {
71 my $class = shift (@_);
72 my $self = new BasClas($class, @_);
73
74 my $sortname = "Title";
75 my ($hfile, $metadata, $title);
76
77 if (!parsargv::parse(\@_,
78 q^buttonname/.*/^, \$title,
79 q^sort/.*/nosort^, \$sortname,
80 q^hfile/.*/^, \$hfile,
81 q^metadata/.*/^, \$metadata,
82 "allow_extra_options")) {
83
84 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
85 &print_usage();
86 die "\n";
87 }
88
89 if (!$metadata) {
90 &print_usage;
91 print STDERR "\nHierarchy error: no metadata supplied\n";
92 die "\n";
93 }
94
95 $title = $metadata unless ($title);
96
97 $sortname = undef if $sortname =~ /^nosort$/;
98
99 if (!$hfile) {
100 &print_usage;
101 print STDERR "\nHierarchy error: No -hfile supplied\n";
102 die "\n";
103 }
104
105 my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
106 if (!-e $subjectfile) {
107 my $collfile = $subjectfile;
108 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
109 if (!-e $subjectfile) {
110 my $outhandle = $self->{'outhandle'};
111 &print_usage;
112 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
113 print STDERR "This file should be in $collfile or $subjectfile\n";
114 die "\n";
115 }
116 }
117
118 $self->{'descriptorlist'} = {}; # first field in subject file
119 $self->{'locatorlist'} = {}; # second field in subject file
120 $self->{'subjectfile'} = $subjectfile;
121 $self->{'metaname'} = $metadata;
122 $self->{'sortname'} = $sortname;
123 $self->{'title'} = $title;
124
125 return bless $self, $class;
126}
127
128sub init {
129 my $self = shift (@_);
130
131 # read in the subject file
132 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
133
134 # $list is a hash that is indexed by the descriptor. The contents of this
135 # hash is a list of two items. The first item is the OID and the second item
136 # is the title
137 foreach $descriptor (keys (%$list)) {
138 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
139 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
140 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
141 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
142 }
143 }
144}
145
146sub classify {
147 my $self = shift (@_);
148 my ($doc_obj) = @_;
149
150 my $doc_OID = $doc_obj->get_OID();
151
152 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
153 $self->{'metaname'});
154
155 my $sortmeta = "";
156 if (defined $self->{'sortname'}) {
157 if ($self->{'sortname'} =~ /^filename$/i) {
158 $sortmeta = $doc_obj->get_source_filename();
159 } else {
160 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
161 $self->{'sortname'});
162 if (defined $sortmeta) {
163 if ($self->{'sortname'} eq "Creator") {
164 &sorttools::format_string_name_english (\$sortmeta);
165 } else {
166 &sorttools::format_string_english (\$sortmeta);
167 }
168 }
169 }
170 $sortmeta = "" unless defined $sortmeta;
171 }
172
173 foreach $metaelement (@$metadata) {
174 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
175 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
176 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
177 [$doc_OID, $sortmeta]);
178 }
179 }
180}
181
182sub get_classify_info {
183 my $self = shift (@_);
184
185 my $list = $self->{'locatorlist'};
186
187 my $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
188 foreach $OID (keys (%$list)) {
189
190 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
191
192 if (defined $self->{'sortname'}) {
193 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
194 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
195 }
196 } else {
197 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
198 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
199 }
200 }
201 }
202
203 return $classifyinfo;
204}
205
206sub get_OID_entry {
207 my $self = shift (@_);
208 my ($OID, $classifyinfo, $title, $classifytype) = @_;
209
210 $OID = "" unless defined $OID;
211 $OID =~ s/^\.+//;
212
213 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
214 $tailOID = "" unless defined $tailOID;
215
216 if (!defined $headOID) {
217 $classifyinfo->{'Title'} = $title;
218 $classifyinfo->{'classifytype'} = $classifytype;
219 return $classifyinfo;
220 }
221
222 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
223
224 # add entries to the contains list until we have one for headOID
225 #### the +10 is a hack that works but I'm not completely sure why
226 #### and don't have time to delve deeper. one day someone should
227 #### fix this ;-) -- Stefan
228 while (scalar(@{$classifyinfo->{'contains'}}) <= ($headOID+10)) {
229 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
230 }
231
232 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $title, $classifytype);
233}
234
235sub get_entry {
236 my $self = shift (@_);
237 my ($title, $childtype, $thistype) = @_;
238
239 # organise into classification structure
240 my %classifyinfo = ('childtype'=>$childtype,
241 'Title'=>$title,
242 'contains'=>[]);
243 $classifyinfo{'thistype'} = $thistype
244 if defined $thistype && $thistype =~ /\w/;
245
246 return \%classifyinfo;
247}
248
249
2501;
Note: See TracBrowser for help on using the repository browser.