[287] | 1 | # simple list classifier plugin
|
---|
| 2 | # collect.cfg usage: 'List metaelement' or simply 'List'
|
---|
| 3 | # the first will create a single list of all documents
|
---|
| 4 | # with metaelement metadata (sorted alphabetically by whichever
|
---|
| 5 | # metadata element was chosen).
|
---|
| 6 | # The second will create a single list of all documents
|
---|
| 7 | # which won't be sorted at all - they may well be in
|
---|
| 8 | # some completely random order ;-|
|
---|
| 9 |
|
---|
| 10 |
|
---|
| 11 | package List;
|
---|
| 12 |
|
---|
| 13 | sub new {
|
---|
| 14 | my ($class, @options) = @_;
|
---|
| 15 |
|
---|
| 16 | my $list = [];
|
---|
| 17 | if (defined $options[0]) {
|
---|
| 18 | $list = {};
|
---|
| 19 | }
|
---|
| 20 |
|
---|
| 21 | return bless {
|
---|
| 22 | 'list'=>$list,
|
---|
| 23 | 'metaname' => $options[0]
|
---|
| 24 | }, $class;
|
---|
| 25 | }
|
---|
| 26 |
|
---|
| 27 | sub init {
|
---|
| 28 | my $self = shift (@_);
|
---|
| 29 |
|
---|
| 30 | if (defined $self->{'metaname'}) {
|
---|
| 31 | $self->{'list'} = {};
|
---|
| 32 | } else {
|
---|
| 33 | $self->{'list'} = [];
|
---|
| 34 | }
|
---|
| 35 | }
|
---|
| 36 |
|
---|
| 37 | sub classify {
|
---|
| 38 | my $self = shift (@_);
|
---|
| 39 | my ($doc_obj) = @_;
|
---|
| 40 |
|
---|
| 41 | my $doc_OID = $doc_obj->get_OID();
|
---|
| 42 |
|
---|
| 43 | if (defined $self->{'metaname'}) {
|
---|
| 44 | my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
|
---|
| 45 | $self->{'metaname'});
|
---|
| 46 | if (defined $metavalue) {
|
---|
| 47 | if ($self->{'metaname'} eq 'Creator') {
|
---|
| 48 | $self->format_string_name_english (\$metavalue);
|
---|
| 49 | } else {
|
---|
| 50 | $self->format_string_english (\$metavalue);
|
---|
| 51 | }
|
---|
| 52 | if (defined $self->{'list'}->{$doc_OID}) {
|
---|
| 53 | print STDERR "WARNING: List::classify called multiple times for $doc_OID\n";
|
---|
| 54 | }
|
---|
| 55 | $self->{'list'}->{$doc_OID} = $metavalue;
|
---|
| 56 | }
|
---|
| 57 | } else {
|
---|
| 58 | push (@{$self->{'list'}}, $doc_OID);
|
---|
| 59 | }
|
---|
| 60 | }
|
---|
| 61 |
|
---|
[316] | 62 | sub get_classify_info {
|
---|
[287] | 63 | my $self = shift (@_);
|
---|
| 64 |
|
---|
| 65 | my $classifytitle = "List";
|
---|
| 66 | my $contains = "";
|
---|
[316] | 67 | my @list = ();
|
---|
[287] | 68 | if (defined $self->{'metaname'}) {
|
---|
| 69 | $classifytitle = $self->{'metaname'};
|
---|
| 70 | if (keys %{$self->{'list'}}) {
|
---|
[316] | 71 | @list = sort {$self->{'list'}->{$a}
|
---|
| 72 | cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
|
---|
[287] | 73 | }
|
---|
| 74 | } else {
|
---|
[316] | 75 | @list = @{$self->{'list'}};
|
---|
[287] | 76 | }
|
---|
| 77 |
|
---|
[316] | 78 | # organise into classification structure
|
---|
| 79 | my %classifyinfo = ('classifytype'=>'List',
|
---|
| 80 | 'Title'=>$classifytitle,
|
---|
| 81 | 'contains'=>[]);
|
---|
| 82 | foreach $OID (@list) {
|
---|
| 83 | push (@{$classifyinfo{'contains'}}, {'OID'=>$OID});
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 | return \%classifyinfo;
|
---|
[287] | 87 | }
|
---|
| 88 |
|
---|
| 89 |
|
---|
| 90 | # format an english name for sorting
|
---|
| 91 | # i.e. convert to lowercase, put surname before
|
---|
| 92 | # first names etc.
|
---|
| 93 | sub format_string_name_english {
|
---|
| 94 | my $self = shift (@_);
|
---|
| 95 | my ($stringref) = @_;
|
---|
| 96 |
|
---|
| 97 | $$stringref =~ tr/A-Z/a-z/;
|
---|
| 98 | $$stringref =~ s/[^a-z0-9 ]//g;
|
---|
| 99 | $$stringref =~ s/\s+/ /g;
|
---|
| 100 | $$stringref =~ s/^\s+//;
|
---|
| 101 | my @names = split / /, $$stringref;
|
---|
| 102 | my $surname = pop @names;
|
---|
| 103 | while (scalar @names && $surname =~ /^(jnr|snr)$/i) {
|
---|
| 104 | $surname = pop @names;
|
---|
| 105 | }
|
---|
| 106 | $$stringref = $surname . " " . $$stringref;
|
---|
| 107 | }
|
---|
| 108 |
|
---|
| 109 | # format an english string for sorting
|
---|
| 110 | # i.e. convert to lowercase, remove the, a or an
|
---|
| 111 | # from beginning of string etc.
|
---|
| 112 | sub format_string_english {
|
---|
| 113 | my $self = shift (@_);
|
---|
| 114 | my ($stringref) = @_;
|
---|
| 115 |
|
---|
| 116 | $$stringref =~ tr/A-Z/a-z/;
|
---|
| 117 | $$stringref =~ s/[^a-z0-9 ]//g;
|
---|
| 118 | $$stringref =~ s/^\s*(the|a|an)\b//;
|
---|
| 119 | $$stringref =~ s/^\s+//;
|
---|
| 120 | }
|
---|
| 121 |
|
---|
| 122 |
|
---|
| 123 | 1;
|
---|