source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 5646

Last change on this file since 5646 was 5645, checked in by mdewsnip, 21 years ago

Moved classifier descriptions into the resource bundle (perllib/strings.rb).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.9 KB
RevLine 
[537]1###########################################################################
2#
3# Hierarchy.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[231]26# classifier plugin for generating hierarchical classifications
27
[384]28# options for this classifier are:
[1947]29# -hfile file.txt - classification file
30# -metadata Metaname - metadata field to test against file.txt
31# -sort Meta - this option is optional (genious;-). by default this
[427]32# classifier will sort documents within each section
33# alphabetically by Title. sort=nosort prevents sorting
34# (i.e. documents will end up in build order), sort=Meta
35# will sort each field alphabetically by Meta (Meta may
36# also be 'Filename' to sort by the original filename).
[2022]37# -buttonname Title - another optional field. this is what will end up in the
[427]38# Title field for this classification. if not present it
39# defaults to Metaname
[2837]40# -hlist_at_top - use a horizontal list for the top level (i.e. display it
41# like an AZList classification)
[384]42
[3540]43# 12/05/02 Added usage datastructure - John Thompson
44# 12/05/02 Modified new() so as not to die on error, only on init() - John Thompson
45
[231]46package Hierarchy;
47
[1483]48use BasClas;
[231]49use util;
50use cfgread;
[384]51use sorttools;
[231]52
[1483]53sub BEGIN {
54 @ISA = ('BasClas');
55}
56
[4759]57my $arguments =
58 [ { 'name' => "metadata",
[4873]59 'desc' => "{Hierarchy.metadata}",
[3540]60 'type' => "metadata",
[4759]61 'reqd' => "yes" },
[4873]62 { 'name' => "hfile",
63 'desc' => "{Hierarchy.hfile}",
64 'type' => "string",
65 'deft' => "",
66 'reqd' => "yes" },
[4759]67 { 'name' => "buttonname",
[4873]68 'desc' => "{Hierarchy.buttonname}",
[3540]69 'type' => "string",
[4759]70 'deft' => "Metadata element specified with -metadata",
71 'reqd' => "no" },
72 { 'name' => "sort",
[4873]73 'desc' => "{Hierarchy.sort}",
[3540]74 'type' => "string",
[4759]75 'deft' => "Metadata field specified with -metadata",
76 'reqd' => "no" },
77 { 'name' => "hlist_at_top",
[4873]78 'desc' => "{Hierarchy.hlist_at_top}",
[3540]79 'type' => "flag",
[4759]80 'reqd' => "no" } ];
[3540]81
82my $options =
83{ 'name' => "Hierarchy",
[5645]84 'desc' => "{Hierarchy.desc}",
[3540]85 'inherits' => "Yes" ,
86 'args' => $arguments };
87
[4786]88# sub print_usage {
89# print STDERR "
90# usage: classify Hierarchy [options]
91# options:
[1839]92
[4786]93# -buttonname X Title field for this classification.
94# Defaults to metadata name.
[1839]95
[4786]96# -metadata X Metadata field used for classification,
97# list will be sorted by this element,
98# unless -sort is used.
[1839]99
[4786]100# -hfile X The classification structure file
[1839]101
[4786]102# -sort X Metadata field to sort by (defaults to -metadata)
103# use '-sort nosort' for no sorting.
[2837]104
[4786]105# -hlist_at_top Display the first level of the classification
106# horizontally.
107# ";
108# }
[1839]109
110
[231]111sub new {
[1839]112 my $class = shift (@_);
113 my $self = new BasClas($class, @_);
[3540]114
[3639]115 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
116 my $option_list = $self->{'option_list'};
117 push( @{$option_list}, $options );
[3540]118
[3639]119 my ($hfile, $metadata, $sortname, $title, $hlist_at_top);
120
[1839]121 if (!parsargv::parse(\@_,
[2022]122 q^buttonname/.*/^, \$title,
[3639]123 q^sort/.*/^, \$sortname,
[1839]124 q^hfile/.*/^, \$hfile,
125 q^metadata/.*/^, \$metadata,
[2837]126 q^hlist_at_top^, \$hlist_at_top,
[1839]127 "allow_extra_options")) {
128
[3639]129 $self->{'construction_error'} = "Incorrect options passed to $class, check your collect.cfg file.";
[427]130 }
131
[1839]132 if (!$metadata) {
[3639]133 $self->{'construction_error'} = "Hierarchy error: no metadata supplied.";
[231]134 }
[3639]135
[1839]136 $title = $metadata unless ($title);
[3639]137 # if no sortname specified, it defaults to metadata
138 $sortname = $metadata unless ($sortname);
[1839]139 $sortname = undef if $sortname =~ /^nosort$/;
[3639]140 my $subjectfile;
[1839]141
142 if (!$hfile) {
[3639]143 $self->{'construction_error'} = "Hierarchy error: No -hfile supplied.";
[1839]144 }
[3540]145 else
[3639]146 {
147 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
148 if (!-e $subjectfile) {
149 my $collfile = $subjectfile;
150 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
151 if (!-e $subjectfile) {
152 my $outhandle = $self->{'outhandle'};
[4873]153 $self->print_txt_usage(""); # Use default resource bundle
[3639]154 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
155 print STDERR "This file should be in $collfile or $subjectfile\n";
156 die "\n";
157 }
158 }
159 }
160
[1483]161 $self->{'descriptorlist'} = {}; # first field in subject file
162 $self->{'locatorlist'} = {}; # second field in subject file
163 $self->{'subjectfile'} = $subjectfile;
164 $self->{'metaname'} = $metadata;
165 $self->{'sortname'} = $sortname;
166 $self->{'title'} = $title;
[2837]167 $self->{'hlist_at_top'} = $hlist_at_top;
[1483]168
169 return bless $self, $class;
[231]170}
171
172sub init {
173 my $self = shift (@_);
[3639]174
175 if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) {
176 print STDERR "Error: " , $self->{'construction_error'} , "\n";
[4873]177 $self->print_txt_usage(""); # Use default resource bundle
[3639]178 die "\n";
179 }
180
[231]181 # read in the subject file
[253]182 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
[231]183
[316]184 # $list is a hash that is indexed by the descriptor. The contents of this
185 # hash is a list of two items. The first item is the OID and the second item
186 # is the title
[231]187 foreach $descriptor (keys (%$list)) {
188 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
189 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
190 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
191 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
192 }
193 }
194}
195
196sub classify {
197 my $self = shift (@_);
198 my ($doc_obj) = @_;
[3639]199
[231]200 my $doc_OID = $doc_obj->get_OID();
[3639]201
[231]202 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
203 $self->{'metaname'});
[3639]204
[3719]205 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language');
[3728]206 $lang = 'en' unless defined $lang;
[3719]207
[427]208 my $sortmeta = "";
209 if (defined $self->{'sortname'}) {
210 if ($self->{'sortname'} =~ /^filename$/i) {
211 $sortmeta = $doc_obj->get_source_filename();
212 } else {
213 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
214 $self->{'sortname'});
[641]215 if (defined $sortmeta) {
216 if ($self->{'sortname'} eq "Creator") {
[3719]217 if ($lang eq 'en') {
218 &sorttools::format_string_name_english (\$sortmeta);
219 }
[641]220 } else {
[3719]221 if ($lang eq 'en') {
222 &sorttools::format_string_english (\$sortmeta);
223 }
[641]224 }
[427]225 }
226 }
227 $sortmeta = "" unless defined $sortmeta;
228 }
[3639]229
[231]230 foreach $metaelement (@$metadata) {
231 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
232 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
[384]233 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
234 [$doc_OID, $sortmeta]);
[231]235 }
236 }
237}
238
[316]239sub get_classify_info {
[231]240 my $self = shift (@_);
[316]241
[231]242 my $list = $self->{'locatorlist'};
[3639]243
[2837]244 my ($classifyinfo);
245 if ($self->{'hlist_at_top'}) {
246 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible");
247 } else {
248 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
249 }
[3639]250 # sorted the keys - otherwise funny things happen - kjdon 03/01/03
251 foreach $OID (sort keys (%$list)) {
[677]252 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");
[3639]253
[384]254 if (defined $self->{'sortname'}) {
255 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
256 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
257 }
258 } else {
259 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
260 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
261 }
[316]262 }
[231]263 }
[384]264
[316]265 return $classifyinfo;
[231]266}
267
[316]268sub get_OID_entry {
[231]269 my $self = shift (@_);
[316]270 my ($OID, $classifyinfo, $title, $classifytype) = @_;
271
272 $OID = "" unless defined $OID;
273 $OID =~ s/^\.+//;
[1608]274
[316]275 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
276 $tailOID = "" unless defined $tailOID;
277
278 if (!defined $headOID) {
279 $classifyinfo->{'Title'} = $title;
280 $classifyinfo->{'classifytype'} = $classifytype;
281 return $classifyinfo;
[231]282 }
[316]283
284 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
[2973]285 my $offset = 0;
286 foreach $thing (@{$classifyinfo->{'contains'}}) {
287 $offset ++ if defined $thing->{'OID'};
288 }
[3540]289
290 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
[316]291 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
292 }
293
[2973]294 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype);
[231]295}
296
[316]297sub get_entry {
298 my $self = shift (@_);
[677]299 my ($title, $childtype, $thistype) = @_;
[1608]300
[316]301 # organise into classification structure
[677]302 my %classifyinfo = ('childtype'=>$childtype,
[316]303 'Title'=>$title,
304 'contains'=>[]);
[677]305 $classifyinfo{'thistype'} = $thistype
306 if defined $thistype && $thistype =~ /\w/;
[231]307
[316]308 return \%classifyinfo;
309}
310
[384]311
[231]3121;
Note: See TracBrowser for help on using the repository browser.