source: trunk/gsdl/perllib/classify/Hierarchy.pm@ 316

Last change on this file since 316 was 316, checked in by sjboddie, 25 years ago
  • changed the way classifiers work
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1# classifier plugin for generating hierarchical classifications
2
3package Hierarchy;
4
5use util;
6use cfgread;
7
8sub new {
9 my ($class, @options) = @_;
10
11 if (!defined @options || (scalar @options < 2)) {
12 die "Error in options passed to Hierarchy classification\n" .
13 "Usage: Hierarchy subjectfile metadata\n";
14 }
15
16 my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $options[0]);
17 if (!-e $subjectfile) {
18 my $collfile = $subjectfile;
19 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $options[0]);
20 if (!-e $subjectfile) {
21 die "Error: Can't locate subject file $options[0]\n" .
22 "This file should be in $collfile or $subjectfile\n";
23 }
24 }
25
26 return bless {
27 'descriptorlist'=>{}, # first field in subject file
28 'locatorlist'=>{}, # second field in subject file
29 'subjectfile' => $subjectfile,
30 'metaname' => $options[1]
31 }, $class;
32}
33
34sub init {
35 my $self = shift (@_);
36
37 # read in the subject file
38 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
39
40 # $list is a hash that is indexed by the descriptor. The contents of this
41 # hash is a list of two items. The first item is the OID and the second item
42 # is the title
43 foreach $descriptor (keys (%$list)) {
44 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
45 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
46 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
47 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
48 }
49 }
50}
51
52sub classify {
53 my $self = shift (@_);
54 my ($doc_obj) = @_;
55
56 my $doc_OID = $doc_obj->get_OID();
57 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
58 $self->{'metaname'});
59
60 foreach $metaelement (@$metadata) {
61 if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
62 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
63 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}}, $doc_OID);
64 }
65 }
66}
67
68
69sub get_classify_info {
70 my $self = shift (@_);
71
72 my $list = $self->{'locatorlist'};
73
74 my $classifyinfo = $self->get_entry ($self->{'metaname'}, "Hierarchy");
75 foreach $OID (keys (%$list)) {
76 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "Hierarchy");
77 foreach $subOID (@{$list->{$OID}->{'contents'}}) {
78 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID});
79 }
80 }
81
82 return $classifyinfo;
83}
84
85
86sub get_OID_entry {
87 my $self = shift (@_);
88 my ($OID, $classifyinfo, $title, $classifytype) = @_;
89
90 $OID = "" unless defined $OID;
91 $OID =~ s/^\.+//;
92
93 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
94 $tailOID = "" unless defined $tailOID;
95
96
97 if (!defined $headOID) {
98 $classifyinfo->{'Title'} = $title;
99 $classifyinfo->{'classifytype'} = $classifytype;
100 return $classifyinfo;
101 }
102
103 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
104
105 # add entries to the contains list until we have one for headOID
106 while (scalar(@{$classifyinfo->{'contains'}}) < $headOID) {
107 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
108 }
109
110 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $title, $classifytype);
111}
112
113sub get_entry {
114 my $self = shift (@_);
115 my ($title, $classifytype) = @_;
116
117 # organise into classification structure
118 my %classifyinfo = ('classifytype'=>$classifytype,
119 'Title'=>$title,
120 'contains'=>[]);
121
122 return \%classifyinfo;
123}
124
1251;
Note: See TracBrowser for help on using the repository browser.