1 | # simple list classifier plugin
|
---|
2 | # collect.cfg usage: 'List metaelement' or simply 'List'
|
---|
3 | # the first will create a single list of all documents
|
---|
4 | # with metaelement metadata (sorted alphabetically by whichever
|
---|
5 | # metadata element was chosen).
|
---|
6 | # The second will create a single list of all documents
|
---|
7 | # which won't be sorted at all - they may well be in
|
---|
8 | # some completely random order ;-|
|
---|
9 |
|
---|
10 |
|
---|
11 | package List;
|
---|
12 |
|
---|
13 | sub new {
|
---|
14 | my ($class, @options) = @_;
|
---|
15 |
|
---|
16 | my $list = [];
|
---|
17 | if (defined $options[0]) {
|
---|
18 | $list = {};
|
---|
19 | }
|
---|
20 |
|
---|
21 | return bless {
|
---|
22 | 'list'=>$list,
|
---|
23 | 'metaname' => $options[0]
|
---|
24 | }, $class;
|
---|
25 | }
|
---|
26 |
|
---|
27 | sub init {
|
---|
28 | my $self = shift (@_);
|
---|
29 |
|
---|
30 | if (defined $self->{'metaname'}) {
|
---|
31 | $self->{'list'} = {};
|
---|
32 | } else {
|
---|
33 | $self->{'list'} = [];
|
---|
34 | }
|
---|
35 | }
|
---|
36 |
|
---|
37 | sub classify {
|
---|
38 | my $self = shift (@_);
|
---|
39 | my ($doc_obj) = @_;
|
---|
40 |
|
---|
41 | my $doc_OID = $doc_obj->get_OID();
|
---|
42 |
|
---|
43 | if (defined $self->{'metaname'}) {
|
---|
44 | my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
|
---|
45 | $self->{'metaname'});
|
---|
46 | if (defined $metavalue) {
|
---|
47 | if ($self->{'metaname'} eq 'Creator') {
|
---|
48 | $self->format_string_name_english (\$metavalue);
|
---|
49 | } else {
|
---|
50 | $self->format_string_english (\$metavalue);
|
---|
51 | }
|
---|
52 | if (defined $self->{'list'}->{$doc_OID}) {
|
---|
53 | print STDERR "WARNING: List::classify called multiple times for $doc_OID\n";
|
---|
54 | }
|
---|
55 | $self->{'list'}->{$doc_OID} = $metavalue;
|
---|
56 | }
|
---|
57 | } else {
|
---|
58 | push (@{$self->{'list'}}, $doc_OID);
|
---|
59 | }
|
---|
60 | }
|
---|
61 |
|
---|
62 | sub get_classify_info {
|
---|
63 | my $self = shift (@_);
|
---|
64 |
|
---|
65 | my $classifytitle = "List";
|
---|
66 | my $contains = "";
|
---|
67 | my @list = ();
|
---|
68 | if (defined $self->{'metaname'}) {
|
---|
69 | $classifytitle = $self->{'metaname'};
|
---|
70 | if (keys %{$self->{'list'}}) {
|
---|
71 | @list = sort {$self->{'list'}->{$a}
|
---|
72 | cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
|
---|
73 | }
|
---|
74 | } else {
|
---|
75 | @list = @{$self->{'list'}};
|
---|
76 | }
|
---|
77 |
|
---|
78 | # organise into classification structure
|
---|
79 | my %classifyinfo = ('classifytype'=>'List',
|
---|
80 | 'Title'=>$classifytitle,
|
---|
81 | 'contains'=>[]);
|
---|
82 | foreach $OID (@list) {
|
---|
83 | push (@{$classifyinfo{'contains'}}, {'OID'=>$OID});
|
---|
84 | }
|
---|
85 |
|
---|
86 | return \%classifyinfo;
|
---|
87 | }
|
---|
88 |
|
---|
89 |
|
---|
90 | # format an english name for sorting
|
---|
91 | # i.e. convert to lowercase, put surname before
|
---|
92 | # first names etc.
|
---|
93 | sub format_string_name_english {
|
---|
94 | my $self = shift (@_);
|
---|
95 | my ($stringref) = @_;
|
---|
96 |
|
---|
97 | $$stringref =~ tr/A-Z/a-z/;
|
---|
98 | $$stringref =~ s/[^a-z0-9 ]//g;
|
---|
99 | $$stringref =~ s/\s+/ /g;
|
---|
100 | $$stringref =~ s/^\s+//;
|
---|
101 | my @names = split / /, $$stringref;
|
---|
102 | my $surname = pop @names;
|
---|
103 | while (scalar @names && $surname =~ /^(jnr|snr)$/i) {
|
---|
104 | $surname = pop @names;
|
---|
105 | }
|
---|
106 | $$stringref = $surname . " " . $$stringref;
|
---|
107 | }
|
---|
108 |
|
---|
109 | # format an english string for sorting
|
---|
110 | # i.e. convert to lowercase, remove the, a or an
|
---|
111 | # from beginning of string etc.
|
---|
112 | sub format_string_english {
|
---|
113 | my $self = shift (@_);
|
---|
114 | my ($stringref) = @_;
|
---|
115 |
|
---|
116 | $$stringref =~ tr/A-Z/a-z/;
|
---|
117 | $$stringref =~ s/[^a-z0-9 ]//g;
|
---|
118 | $$stringref =~ s/^\s*(the|a|an)\b//;
|
---|
119 | $$stringref =~ s/^\s+//;
|
---|
120 | }
|
---|
121 |
|
---|
122 |
|
---|
123 | 1;
|
---|