1 | # classifier plugin for generating hierarchical classifications
|
---|
2 |
|
---|
3 | package Hierarchy;
|
---|
4 |
|
---|
5 | use util;
|
---|
6 | use cfgread;
|
---|
7 |
|
---|
8 | sub new {
|
---|
9 | my ($class, @options) = @_;
|
---|
10 |
|
---|
11 | if (!defined @options || (scalar @options < 2)) {
|
---|
12 | die "Error in options passed to Hierarchy classification\n" .
|
---|
13 | "Usage: Hierarchy subjectfile metadata\n";
|
---|
14 | }
|
---|
15 |
|
---|
16 | my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $options[0]);
|
---|
17 | if (!-e $subjectfile) {
|
---|
18 | my $collfile = $subjectfile;
|
---|
19 | $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $options[0]);
|
---|
20 | if (!-e $subjectfile) {
|
---|
21 | die "Error: Can't locate subject file $options[0]\n" .
|
---|
22 | "This file should be in $collfile or $subjectfile\n";
|
---|
23 | }
|
---|
24 | }
|
---|
25 |
|
---|
26 | return bless {
|
---|
27 | 'descriptorlist'=>{}, # first field in subject file
|
---|
28 | 'locatorlist'=>{}, # second field in subject file
|
---|
29 | 'subjectfile' => $subjectfile,
|
---|
30 | 'metaname' => $options[1]
|
---|
31 | }, $class;
|
---|
32 | }
|
---|
33 |
|
---|
34 | sub init {
|
---|
35 | my $self = shift (@_);
|
---|
36 |
|
---|
37 | # read in the subject file
|
---|
38 | my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
|
---|
39 |
|
---|
40 | # $list is a hash that is indexed by the descriptor. The contents of this
|
---|
41 | # hash is a list of two items. The first item is the OID and the second item
|
---|
42 | # is the title
|
---|
43 | foreach $descriptor (keys (%$list)) {
|
---|
44 | $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
|
---|
45 | unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
|
---|
46 | $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
|
---|
47 | $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
|
---|
48 | }
|
---|
49 | }
|
---|
50 | }
|
---|
51 |
|
---|
52 | sub classify {
|
---|
53 | my $self = shift (@_);
|
---|
54 | my ($doc_obj) = @_;
|
---|
55 |
|
---|
56 | my $doc_OID = $doc_obj->get_OID();
|
---|
57 | my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
|
---|
58 | $self->{'metaname'});
|
---|
59 |
|
---|
60 | foreach $metaelement (@$metadata) {
|
---|
61 | if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
|
---|
62 | (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
|
---|
63 | push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}}, $doc_OID);
|
---|
64 | }
|
---|
65 | }
|
---|
66 | }
|
---|
67 |
|
---|
68 |
|
---|
69 | sub get_classify_info {
|
---|
70 | my $self = shift (@_);
|
---|
71 |
|
---|
72 | my $list = $self->{'locatorlist'};
|
---|
73 |
|
---|
74 | my $classifyinfo = $self->get_entry ($self->{'metaname'}, "Hierarchy");
|
---|
75 | foreach $OID (keys (%$list)) {
|
---|
76 | my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "Hierarchy");
|
---|
77 | foreach $subOID (@{$list->{$OID}->{'contents'}}) {
|
---|
78 | push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID});
|
---|
79 | }
|
---|
80 | }
|
---|
81 |
|
---|
82 | return $classifyinfo;
|
---|
83 | }
|
---|
84 |
|
---|
85 |
|
---|
86 | sub get_OID_entry {
|
---|
87 | my $self = shift (@_);
|
---|
88 | my ($OID, $classifyinfo, $title, $classifytype) = @_;
|
---|
89 |
|
---|
90 | $OID = "" unless defined $OID;
|
---|
91 | $OID =~ s/^\.+//;
|
---|
92 |
|
---|
93 | my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
|
---|
94 | $tailOID = "" unless defined $tailOID;
|
---|
95 |
|
---|
96 |
|
---|
97 | if (!defined $headOID) {
|
---|
98 | $classifyinfo->{'Title'} = $title;
|
---|
99 | $classifyinfo->{'classifytype'} = $classifytype;
|
---|
100 | return $classifyinfo;
|
---|
101 | }
|
---|
102 |
|
---|
103 | $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
|
---|
104 |
|
---|
105 | # add entries to the contains list until we have one for headOID
|
---|
106 | while (scalar(@{$classifyinfo->{'contains'}}) < $headOID) {
|
---|
107 | push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
|
---|
108 | }
|
---|
109 |
|
---|
110 | return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $title, $classifytype);
|
---|
111 | }
|
---|
112 |
|
---|
113 | sub get_entry {
|
---|
114 | my $self = shift (@_);
|
---|
115 | my ($title, $classifytype) = @_;
|
---|
116 |
|
---|
117 | # organise into classification structure
|
---|
118 | my %classifyinfo = ('classifytype'=>$classifytype,
|
---|
119 | 'Title'=>$title,
|
---|
120 | 'contains'=>[]);
|
---|
121 |
|
---|
122 | return \%classifyinfo;
|
---|
123 | }
|
---|
124 |
|
---|
125 | 1;
|
---|