source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 384

Last change on this file since 384 was 384, checked in by sjboddie, 25 years ago

added a sort option

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.0 KB
Line 
1# classifier plugin for generating hierarchical classifications
2
3# options for this classifier are:
4# file.txt - classification file
5# Metaname - metadata field to test against file.txt
6# sort=Meta - this option is optional (genious;-). by default this
7# classifier will sort documents within each section
8# alphabetically by Title. sort=nosort prevents sorting
9# (i.e. documents will end up in build order), sort=Meta
10# will sort each field alphabetically by Meta (Meta may
11# also be 'Filename' to sort by the original filename).
12
13
14package Hierarchy;
15
16use util;
17use cfgread;
18use sorttools;
19
20sub new {
21 my ($class, @options) = @_;
22
23 if (!defined @options || (scalar @options < 2)) {
24 die "Error in options passed to Hierarchy classification\n" .
25 "Usage: Hierarchy subjectfile metadata\n";
26 }
27 my $sortname = "Title";
28 if ((scalar @options > 2) && $options[2] =~ /^sort=(.*)$/i) {
29 $sortname = $1;
30 $sortname = undef if $sortname =~ /^nosort$/;
31 }
32
33 my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $options[0]);
34 if (!-e $subjectfile) {
35 my $collfile = $subjectfile;
36 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $options[0]);
37 if (!-e $subjectfile) {
38 die "Hierarchy Error: Can't locate subject file $options[0]\n" .
39 "This file should be in $collfile or $subjectfile\n";
40 }
41 }
42
43 return bless {
44 'descriptorlist'=>{}, # first field in subject file
45 'locatorlist'=>{}, # second field in subject file
46 'subjectfile' => $subjectfile,
47 'metaname' => $options[1],
48 'sortname' => $sortname
49 }, $class;
50}
51
52sub init {
53 my $self = shift (@_);
54
55 # read in the subject file
56 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
57
58 # $list is a hash that is indexed by the descriptor. The contents of this
59 # hash is a list of two items. The first item is the OID and the second item
60 # is the title
61 foreach $descriptor (keys (%$list)) {
62 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
63 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
64 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
65 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
66 }
67 }
68}
69
70sub classify {
71 my $self = shift (@_);
72 my ($doc_obj) = @_;
73
74 my $doc_OID = $doc_obj->get_OID();
75
76 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
77 $self->{'metaname'});
78
79 foreach $metaelement (@$metadata) {
80 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
81 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
82
83 my $sortmeta = "";
84 if (defined $self->{'sortname'}) {
85 if ($self->{'sortname'} =~ /^filename$/i) {
86 $sortmeta = $doc_obj->get_source_filename();
87 } else {
88 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
89 $self->{'sortname'});
90 if ($self-{'sortname'} eq "Creator") {
91 &sorttools::format_string_name_english ($sortmeta);
92 } else {
93 &sorttools::format_string_english ($sortmeta);
94 }
95 }
96 $sortmeta = "" unless defined $sortmeta;
97 }
98
99 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
100 [$doc_OID, $sortmeta]);
101 }
102 }
103}
104
105
106sub get_classify_info {
107 my $self = shift (@_);
108
109 my $list = $self->{'locatorlist'};
110
111 my $classifyinfo = $self->get_entry ($self->{'metaname'}, "Hierarchy");
112 foreach $OID (keys (%$list)) {
113 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "Hierarchy");
114
115 if (defined $self->{'sortname'}) {
116 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
117 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
118 }
119 } else {
120 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
121 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
122 }
123 }
124 }
125
126 return $classifyinfo;
127}
128
129
130sub get_OID_entry {
131 my $self = shift (@_);
132 my ($OID, $classifyinfo, $title, $classifytype) = @_;
133
134 $OID = "" unless defined $OID;
135 $OID =~ s/^\.+//;
136
137 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
138 $tailOID = "" unless defined $tailOID;
139
140
141 if (!defined $headOID) {
142 $classifyinfo->{'Title'} = $title;
143 $classifyinfo->{'classifytype'} = $classifytype;
144 return $classifyinfo;
145 }
146
147 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
148
149 # add entries to the contains list until we have one for headOID
150 while (scalar(@{$classifyinfo->{'contains'}}) < $headOID) {
151 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
152 }
153
154 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $title, $classifytype);
155}
156
157sub get_entry {
158 my $self = shift (@_);
159 my ($title, $classifytype) = @_;
160
161 # organise into classification structure
162 my %classifyinfo = ('classifytype'=>$classifytype,
163 'Title'=>$title,
164 'contains'=>[]);
165
166 return \%classifyinfo;
167}
168
169
170
1711;
Note: See TracBrowser for help on using the repository browser.