1 | # classifier plugin for generating hierarchical classifications
|
---|
2 |
|
---|
3 | package Hierarchy;
|
---|
4 |
|
---|
5 | use util;
|
---|
6 | use cfgread;
|
---|
7 |
|
---|
8 | sub new {
|
---|
9 | my ($class, @options) = @_;
|
---|
10 |
|
---|
11 | if (!defined @options || (scalar @options < 2)) {
|
---|
12 | die "Error in options passed to Hierarchy classification\n" .
|
---|
13 | "Usage: Hierarchy subjectfile metadata\n";
|
---|
14 | }
|
---|
15 |
|
---|
16 | my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $options[0]);
|
---|
17 | if (!-e $subjectfile) {
|
---|
18 | my $collfile = $subjectfile;
|
---|
19 | $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $options[0]);
|
---|
20 | if (!-e $subjectfile) {
|
---|
21 | die "Error: Can't locate subject file $options[0]\n" .
|
---|
22 | "This file should be in $collfile or $subjectfile\n";
|
---|
23 | }
|
---|
24 | }
|
---|
25 |
|
---|
26 | return bless {
|
---|
27 | 'OID'=>"NULL",
|
---|
28 | 'descriptorlist'=>{},
|
---|
29 | 'locatorlist'=>{},
|
---|
30 | 'subjectfile' => $subjectfile,
|
---|
31 | 'metaname' => $options[1]
|
---|
32 | }, $class;
|
---|
33 | }
|
---|
34 |
|
---|
35 | sub init {
|
---|
36 | my $self = shift (@_);
|
---|
37 | my $contenthash = {};
|
---|
38 |
|
---|
39 | # read in the subject file
|
---|
40 | my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
|
---|
41 |
|
---|
42 | foreach $descriptor (keys (%$list)) {
|
---|
43 | $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
|
---|
44 | unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
|
---|
45 | $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
|
---|
46 | $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
|
---|
47 | }
|
---|
48 |
|
---|
49 | my $OID = $list->{$descriptor}->[0];
|
---|
50 | if ($OID =~ s/\.(\d+)$//) {
|
---|
51 | # $OID is now the parents OID
|
---|
52 | my $child = $1;
|
---|
53 | $contenthash->{$OID} = [] unless defined $contenthash->{$OID};
|
---|
54 | push (@{$contenthash->{$OID}}, $child);
|
---|
55 | }
|
---|
56 | }
|
---|
57 |
|
---|
58 | # need to shove each OID into the contents of its parent
|
---|
59 | # do this separately in case classifications aren't in order in
|
---|
60 | # subject file
|
---|
61 | foreach $OID (keys (%$contenthash)) {
|
---|
62 | push (@{$self->{'locatorlist'}->{$OID}->{'contents'}},
|
---|
63 | sort numerically @{$contenthash->{$OID}});
|
---|
64 | map { $_ = "\".$_"; } @{$self->{'locatorlist'}->{$OID}->{'contents'}};
|
---|
65 | }
|
---|
66 | }
|
---|
67 |
|
---|
68 | sub set_OID {
|
---|
69 | my $self = shift (@_);
|
---|
70 | my ($OID) = @_;
|
---|
71 |
|
---|
72 | $self->{'OID'} = $OID;
|
---|
73 | }
|
---|
74 |
|
---|
75 | sub get_OID {
|
---|
76 | my $self = shift (@_);
|
---|
77 | return $self->{'OID'};
|
---|
78 | }
|
---|
79 |
|
---|
80 | sub classify {
|
---|
81 | my $self = shift (@_);
|
---|
82 | my ($doc_obj) = @_;
|
---|
83 |
|
---|
84 | my $doc_OID = $doc_obj->get_OID();
|
---|
85 | my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
|
---|
86 | $self->{'metaname'});
|
---|
87 |
|
---|
88 | foreach $metaelement (@$metadata) {
|
---|
89 | if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
|
---|
90 | (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
|
---|
91 | push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}}, $doc_OID);
|
---|
92 | }
|
---|
93 | }
|
---|
94 | }
|
---|
95 |
|
---|
96 | sub output_classify_info {
|
---|
97 | my $self = shift (@_);
|
---|
98 | my ($handle) = @_;
|
---|
99 | my $topcontents = [];
|
---|
100 |
|
---|
101 | my $list = $self->{'locatorlist'};
|
---|
102 |
|
---|
103 | foreach $OID (keys (%$list)) {
|
---|
104 | $self->output_entry ($handle, "$self->{'OID'}.$OID",
|
---|
105 | $list->{$OID}->{'title'}, $list->{$OID}->{'contents'});
|
---|
106 | push (@$topcontents, $OID) if $OID !~ /\./;
|
---|
107 | }
|
---|
108 |
|
---|
109 | # top level of classification
|
---|
110 | map { $_ = "\".$_"; } sort numerically @$topcontents;
|
---|
111 | $self->output_entry ($handle, $self->{'OID'}, $self->{'metaname'},
|
---|
112 | $topcontents, "Hierarchy");
|
---|
113 | }
|
---|
114 |
|
---|
115 | sub output_entry {
|
---|
116 | my $self = shift (@_);
|
---|
117 | my ($handle, $OID, $title, $contentsref, $classifytype) = @_;
|
---|
118 |
|
---|
119 | print $handle "[$OID]\n";
|
---|
120 | print $handle "<doctype>classify\n";
|
---|
121 | print $handle "<hastxt>0\n";
|
---|
122 | print $handle "<Title>$title\n";
|
---|
123 | print $handle "<classifytype>$classifytype\n" if defined $classifytype;
|
---|
124 | if (scalar @$contentsref) {
|
---|
125 | print $handle "<contains>", join (";", @$contentsref), "\n";
|
---|
126 | }
|
---|
127 | print $handle '-' x 70, "\n";
|
---|
128 | }
|
---|
129 |
|
---|
130 | sub numerically { $a <=> $b; }
|
---|
131 |
|
---|
132 | 1;
|
---|