source: trunk/gsdl/perllib/classify/List.pm@ 287

Last change on this file since 287 was 287, checked in by sjboddie, 25 years ago

list classifier became List. Now does a bit more than it used to

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.2 KB
Line 
1# simple list classifier plugin
2# collect.cfg usage: 'List metaelement' or simply 'List'
3# the first will create a single list of all documents
4# with metaelement metadata (sorted alphabetically by whichever
5# metadata element was chosen).
6# The second will create a single list of all documents
7# which won't be sorted at all - they may well be in
8# some completely random order ;-|
9
10
11package List;
12
13sub new {
14 my ($class, @options) = @_;
15
16 my $list = [];
17 if (defined $options[0]) {
18 $list = {};
19 }
20
21 return bless {
22 'OID'=>"NULL",
23 'list'=>$list,
24 'metaname' => $options[0]
25 }, $class;
26}
27
28sub init {
29 my $self = shift (@_);
30
31 if (defined $self->{'metaname'}) {
32 $self->{'list'} = {};
33 } else {
34 $self->{'list'} = [];
35 }
36}
37
38sub set_OID {
39 my $self = shift (@_);
40 my ($OID) = @_;
41
42 $self->{'OID'} = $OID;
43}
44
45sub get_OID {
46 my $self = shift (@_);
47 return $self->{'OID'};
48}
49
50sub classify {
51 my $self = shift (@_);
52 my ($doc_obj) = @_;
53
54 my $doc_OID = $doc_obj->get_OID();
55
56 if (defined $self->{'metaname'}) {
57 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
58 $self->{'metaname'});
59 if (defined $metavalue) {
60 if ($self->{'metaname'} eq 'Creator') {
61 $self->format_string_name_english (\$metavalue);
62 } else {
63 $self->format_string_english (\$metavalue);
64 }
65 if (defined $self->{'list'}->{$doc_OID}) {
66 print STDERR "WARNING: List::classify called multiple times for $doc_OID\n";
67 }
68 $self->{'list'}->{$doc_OID} = $metavalue;
69 }
70 } else {
71 push (@{$self->{'list'}}, $doc_OID);
72 }
73}
74
75sub output_classify_info {
76 my $self = shift (@_);
77 my ($handle) = @_;
78
79 my $classifytitle = "List";
80 my $contains = "";
81 if (defined $self->{'metaname'}) {
82 $classifytitle = $self->{'metaname'};
83 if (keys %{$self->{'list'}}) {
84 $contains = join (";", sort {$self->{'list'}->{$a}
85 cmp $self->{'list'}->{$b};} keys %{$self->{'list'}}) . "\n";
86 }
87 } else {
88 if (scalar (@{$self->{'list'}}) > 0) {
89 $contains = join (";", @{$self->{'list'}}) . "\n";
90 }
91 }
92
93 print $handle "[$self->{'OID'}]\n";
94 print $handle "<doctype>classify\n";
95 print $handle "<hastxt>0\n";
96 print $handle "<classifytype>List\n";
97 print $handle "<Title>$classifytitle\n";
98 print $handle "<contains>$contains\n" if $contains =~ /\w/;
99 print $handle '-' x 70, "\n";
100}
101
102
103# format an english name for sorting
104# i.e. convert to lowercase, put surname before
105# first names etc.
106sub format_string_name_english {
107 my $self = shift (@_);
108 my ($stringref) = @_;
109
110 $$stringref =~ tr/A-Z/a-z/;
111 $$stringref =~ s/[^a-z0-9 ]//g;
112 $$stringref =~ s/\s+/ /g;
113 $$stringref =~ s/^\s+//;
114 my @names = split / /, $$stringref;
115 my $surname = pop @names;
116 while (scalar @names && $surname =~ /^(jnr|snr)$/i) {
117 $surname = pop @names;
118 }
119 $$stringref = $surname . " " . $$stringref;
120}
121
122# format an english string for sorting
123# i.e. convert to lowercase, remove the, a or an
124# from beginning of string etc.
125sub format_string_english {
126 my $self = shift (@_);
127 my ($stringref) = @_;
128
129 $$stringref =~ tr/A-Z/a-z/;
130 $$stringref =~ s/[^a-z0-9 ]//g;
131 $$stringref =~ s/^\s*(the|a|an)\b//;
132 $$stringref =~ s/^\s+//;
133}
134
135
1361;
Note: See TracBrowser for help on using the repository browser.