[287] | 1 | # simple list classifier plugin
|
---|
| 2 | # collect.cfg usage: 'List metaelement' or simply 'List'
|
---|
| 3 | # the first will create a single list of all documents
|
---|
| 4 | # with metaelement metadata (sorted alphabetically by whichever
|
---|
| 5 | # metadata element was chosen).
|
---|
| 6 | # The second will create a single list of all documents
|
---|
| 7 | # which won't be sorted at all - they may well be in
|
---|
| 8 | # some completely random order ;-|
|
---|
| 9 |
|
---|
| 10 |
|
---|
| 11 | package List;
|
---|
| 12 |
|
---|
| 13 | sub new {
|
---|
| 14 | my ($class, @options) = @_;
|
---|
| 15 |
|
---|
| 16 | my $list = [];
|
---|
| 17 | if (defined $options[0]) {
|
---|
| 18 | $list = {};
|
---|
| 19 | }
|
---|
| 20 |
|
---|
| 21 | return bless {
|
---|
| 22 | 'OID'=>"NULL",
|
---|
| 23 | 'list'=>$list,
|
---|
| 24 | 'metaname' => $options[0]
|
---|
| 25 | }, $class;
|
---|
| 26 | }
|
---|
| 27 |
|
---|
| 28 | sub init {
|
---|
| 29 | my $self = shift (@_);
|
---|
| 30 |
|
---|
| 31 | if (defined $self->{'metaname'}) {
|
---|
| 32 | $self->{'list'} = {};
|
---|
| 33 | } else {
|
---|
| 34 | $self->{'list'} = [];
|
---|
| 35 | }
|
---|
| 36 | }
|
---|
| 37 |
|
---|
| 38 | sub set_OID {
|
---|
| 39 | my $self = shift (@_);
|
---|
| 40 | my ($OID) = @_;
|
---|
| 41 |
|
---|
| 42 | $self->{'OID'} = $OID;
|
---|
| 43 | }
|
---|
| 44 |
|
---|
| 45 | sub get_OID {
|
---|
| 46 | my $self = shift (@_);
|
---|
| 47 | return $self->{'OID'};
|
---|
| 48 | }
|
---|
| 49 |
|
---|
| 50 | sub classify {
|
---|
| 51 | my $self = shift (@_);
|
---|
| 52 | my ($doc_obj) = @_;
|
---|
| 53 |
|
---|
| 54 | my $doc_OID = $doc_obj->get_OID();
|
---|
| 55 |
|
---|
| 56 | if (defined $self->{'metaname'}) {
|
---|
| 57 | my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
|
---|
| 58 | $self->{'metaname'});
|
---|
| 59 | if (defined $metavalue) {
|
---|
| 60 | if ($self->{'metaname'} eq 'Creator') {
|
---|
| 61 | $self->format_string_name_english (\$metavalue);
|
---|
| 62 | } else {
|
---|
| 63 | $self->format_string_english (\$metavalue);
|
---|
| 64 | }
|
---|
| 65 | if (defined $self->{'list'}->{$doc_OID}) {
|
---|
| 66 | print STDERR "WARNING: List::classify called multiple times for $doc_OID\n";
|
---|
| 67 | }
|
---|
| 68 | $self->{'list'}->{$doc_OID} = $metavalue;
|
---|
| 69 | }
|
---|
| 70 | } else {
|
---|
| 71 | push (@{$self->{'list'}}, $doc_OID);
|
---|
| 72 | }
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | sub output_classify_info {
|
---|
| 76 | my $self = shift (@_);
|
---|
| 77 | my ($handle) = @_;
|
---|
| 78 |
|
---|
| 79 | my $classifytitle = "List";
|
---|
| 80 | my $contains = "";
|
---|
| 81 | if (defined $self->{'metaname'}) {
|
---|
| 82 | $classifytitle = $self->{'metaname'};
|
---|
| 83 | if (keys %{$self->{'list'}}) {
|
---|
| 84 | $contains = join (";", sort {$self->{'list'}->{$a}
|
---|
| 85 | cmp $self->{'list'}->{$b};} keys %{$self->{'list'}}) . "\n";
|
---|
| 86 | }
|
---|
| 87 | } else {
|
---|
| 88 | if (scalar (@{$self->{'list'}}) > 0) {
|
---|
| 89 | $contains = join (";", @{$self->{'list'}}) . "\n";
|
---|
| 90 | }
|
---|
| 91 | }
|
---|
| 92 |
|
---|
| 93 | print $handle "[$self->{'OID'}]\n";
|
---|
| 94 | print $handle "<doctype>classify\n";
|
---|
| 95 | print $handle "<hastxt>0\n";
|
---|
| 96 | print $handle "<classifytype>List\n";
|
---|
| 97 | print $handle "<Title>$classifytitle\n";
|
---|
| 98 | print $handle "<contains>$contains\n" if $contains =~ /\w/;
|
---|
| 99 | print $handle '-' x 70, "\n";
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 |
|
---|
| 103 | # format an english name for sorting
|
---|
| 104 | # i.e. convert to lowercase, put surname before
|
---|
| 105 | # first names etc.
|
---|
| 106 | sub format_string_name_english {
|
---|
| 107 | my $self = shift (@_);
|
---|
| 108 | my ($stringref) = @_;
|
---|
| 109 |
|
---|
| 110 | $$stringref =~ tr/A-Z/a-z/;
|
---|
| 111 | $$stringref =~ s/[^a-z0-9 ]//g;
|
---|
| 112 | $$stringref =~ s/\s+/ /g;
|
---|
| 113 | $$stringref =~ s/^\s+//;
|
---|
| 114 | my @names = split / /, $$stringref;
|
---|
| 115 | my $surname = pop @names;
|
---|
| 116 | while (scalar @names && $surname =~ /^(jnr|snr)$/i) {
|
---|
| 117 | $surname = pop @names;
|
---|
| 118 | }
|
---|
| 119 | $$stringref = $surname . " " . $$stringref;
|
---|
| 120 | }
|
---|
| 121 |
|
---|
| 122 | # format an english string for sorting
|
---|
| 123 | # i.e. convert to lowercase, remove the, a or an
|
---|
| 124 | # from beginning of string etc.
|
---|
| 125 | sub format_string_english {
|
---|
| 126 | my $self = shift (@_);
|
---|
| 127 | my ($stringref) = @_;
|
---|
| 128 |
|
---|
| 129 | $$stringref =~ tr/A-Z/a-z/;
|
---|
| 130 | $$stringref =~ s/[^a-z0-9 ]//g;
|
---|
| 131 | $$stringref =~ s/^\s*(the|a|an)\b//;
|
---|
| 132 | $$stringref =~ s/^\s+//;
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 |
|
---|
| 136 | 1;
|
---|