# simple list classifier plugin # collect.cfg usage: 'List metaelement' or simply 'List' # the first will create a single list of all documents # with metaelement metadata (sorted alphabetically by whichever # metadata element was chosen). # The second will create a single list of all documents # which won't be sorted at all - they may well be in # some completely random order ;-| package List; sub new { my ($class, @options) = @_; my $list = []; if (defined $options[0]) { $list = {}; } return bless { 'list'=>$list, 'metaname' => $options[0] }, $class; } sub init { my $self = shift (@_); if (defined $self->{'metaname'}) { $self->{'list'} = {}; } else { $self->{'list'} = []; } } sub classify { my $self = shift (@_); my ($doc_obj) = @_; my $doc_OID = $doc_obj->get_OID(); if (defined $self->{'metaname'}) { my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'metaname'}); if (defined $metavalue) { if ($self->{'metaname'} eq 'Creator') { $self->format_string_name_english (\$metavalue); } else { $self->format_string_english (\$metavalue); } if (defined $self->{'list'}->{$doc_OID}) { print STDERR "WARNING: List::classify called multiple times for $doc_OID\n"; } $self->{'list'}->{$doc_OID} = $metavalue; } } else { push (@{$self->{'list'}}, $doc_OID); } } sub get_classify_info { my $self = shift (@_); my $classifytitle = "List"; my $contains = ""; my @list = (); if (defined $self->{'metaname'}) { $classifytitle = $self->{'metaname'}; if (keys %{$self->{'list'}}) { @list = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}}; } } else { @list = @{$self->{'list'}}; } # organise into classification structure my %classifyinfo = ('classifytype'=>'List', 'Title'=>$classifytitle, 'contains'=>[]); foreach $OID (@list) { push (@{$classifyinfo{'contains'}}, {'OID'=>$OID}); } return \%classifyinfo; } # format an english name for sorting # i.e. convert to lowercase, put surname before # first names etc. sub format_string_name_english { my $self = shift (@_); my ($stringref) = @_; $$stringref =~ tr/A-Z/a-z/; $$stringref =~ s/[^a-z0-9 ]//g; $$stringref =~ s/\s+/ /g; $$stringref =~ s/^\s+//; my @names = split / /, $$stringref; my $surname = pop @names; while (scalar @names && $surname =~ /^(jnr|snr)$/i) { $surname = pop @names; } $$stringref = $surname . " " . $$stringref; } # format an english string for sorting # i.e. convert to lowercase, remove the, a or an # from beginning of string etc. sub format_string_english { my $self = shift (@_); my ($stringref) = @_; $$stringref =~ tr/A-Z/a-z/; $$stringref =~ s/[^a-z0-9 ]//g; $$stringref =~ s/^\s*(the|a|an)\b//; $$stringref =~ s/^\s+//; } 1;