source: trunk/gsdl/perllib/classify/List.pm@ 316

Last change on this file since 316 was 316, checked in by sjboddie, 25 years ago
  • changed the way classifiers work
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 2.9 KB
Line 
1# simple list classifier plugin
2# collect.cfg usage: 'List metaelement' or simply 'List'
3# the first will create a single list of all documents
4# with metaelement metadata (sorted alphabetically by whichever
5# metadata element was chosen).
6# The second will create a single list of all documents
7# which won't be sorted at all - they may well be in
8# some completely random order ;-|
9
10
11package List;
12
13sub new {
14 my ($class, @options) = @_;
15
16 my $list = [];
17 if (defined $options[0]) {
18 $list = {};
19 }
20
21 return bless {
22 'list'=>$list,
23 'metaname' => $options[0]
24 }, $class;
25}
26
27sub init {
28 my $self = shift (@_);
29
30 if (defined $self->{'metaname'}) {
31 $self->{'list'} = {};
32 } else {
33 $self->{'list'} = [];
34 }
35}
36
37sub classify {
38 my $self = shift (@_);
39 my ($doc_obj) = @_;
40
41 my $doc_OID = $doc_obj->get_OID();
42
43 if (defined $self->{'metaname'}) {
44 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
45 $self->{'metaname'});
46 if (defined $metavalue) {
47 if ($self->{'metaname'} eq 'Creator') {
48 $self->format_string_name_english (\$metavalue);
49 } else {
50 $self->format_string_english (\$metavalue);
51 }
52 if (defined $self->{'list'}->{$doc_OID}) {
53 print STDERR "WARNING: List::classify called multiple times for $doc_OID\n";
54 }
55 $self->{'list'}->{$doc_OID} = $metavalue;
56 }
57 } else {
58 push (@{$self->{'list'}}, $doc_OID);
59 }
60}
61
62sub get_classify_info {
63 my $self = shift (@_);
64
65 my $classifytitle = "List";
66 my $contains = "";
67 my @list = ();
68 if (defined $self->{'metaname'}) {
69 $classifytitle = $self->{'metaname'};
70 if (keys %{$self->{'list'}}) {
71 @list = sort {$self->{'list'}->{$a}
72 cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
73 }
74 } else {
75 @list = @{$self->{'list'}};
76 }
77
78 # organise into classification structure
79 my %classifyinfo = ('classifytype'=>'List',
80 'Title'=>$classifytitle,
81 'contains'=>[]);
82 foreach $OID (@list) {
83 push (@{$classifyinfo{'contains'}}, {'OID'=>$OID});
84 }
85
86 return \%classifyinfo;
87}
88
89
90# format an english name for sorting
91# i.e. convert to lowercase, put surname before
92# first names etc.
93sub format_string_name_english {
94 my $self = shift (@_);
95 my ($stringref) = @_;
96
97 $$stringref =~ tr/A-Z/a-z/;
98 $$stringref =~ s/[^a-z0-9 ]//g;
99 $$stringref =~ s/\s+/ /g;
100 $$stringref =~ s/^\s+//;
101 my @names = split / /, $$stringref;
102 my $surname = pop @names;
103 while (scalar @names && $surname =~ /^(jnr|snr)$/i) {
104 $surname = pop @names;
105 }
106 $$stringref = $surname . " " . $$stringref;
107}
108
109# format an english string for sorting
110# i.e. convert to lowercase, remove the, a or an
111# from beginning of string etc.
112sub format_string_english {
113 my $self = shift (@_);
114 my ($stringref) = @_;
115
116 $$stringref =~ tr/A-Z/a-z/;
117 $$stringref =~ s/[^a-z0-9 ]//g;
118 $$stringref =~ s/^\s*(the|a|an)\b//;
119 $$stringref =~ s/^\s+//;
120}
121
122
1231;
Note: See TracBrowser for help on using the repository browser.