Changeset 16011


Ignore:
Timestamp:
2008-06-16T11:14:30+12:00 (16 years ago)
Author:
kjdon
Message:

moved the -first option to here from ReadTextFile

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/AutoExtractMetadata.pm

    r15919 r16011  
    4545}
    4646
    47 my $arguments = [];
     47my $arguments = [
     48         {'name' => "first",
     49          'desc' => "{AutoExtractMetadata.first}",
     50          'type' => "string",
     51          'reqd' => "no" }
     52         ];
    4853
    4954
     
    96101}
    97102
    98 # here is where we call methods from the supporting plugins - gis and textextract
     103# here is where we call methods from the supporting extractor plugins
    99104sub auto_extract_metadata {
    100105    my $self = shift(@_);
    101106    my ($doc_obj) = @_;
    102107
     108    if ($self->{'first'}) {
     109    my $thissection = $doc_obj->get_top_section();
     110    while (defined $thissection) {
     111        my $text = $doc_obj->get_text($thissection);
     112        $self->extract_first_NNNN_characters (\$text, $doc_obj, $thissection) if $text =~ /./;
     113        $thissection = $doc_obj->get_next_section ($thissection);
     114    }
     115    }
    103116    $self->extract_acronym_metadata($doc_obj);
    104117    $self->extract_keyphrase_metadata($doc_obj);
     
    107120    $self->extract_gis_metadata($doc_obj);
    108121
     122}
     123
     124
     125# FIRSTNNN: extract the first NNN characters as metadata
     126sub extract_first_NNNN_characters {
     127    my $self = shift (@_);
     128    my ($textref, $doc_obj, $thissection) = @_;
     129   
     130    foreach my $size (split /,/, $self->{'first'}) {
     131    my $tmptext =  $$textref;
     132    $tmptext =~ s/^\s+//;
     133    $tmptext =~ s/\s+$//;
     134    $tmptext =~ s/\s+/ /gs;
     135    $tmptext = substr ($tmptext, 0, $size);
     136    $tmptext =~ s/\s\S*$/…/;
     137    $doc_obj->add_utf8_metadata ($thissection, "First$size", $tmptext);
     138    }
    109139}
    110140
Note: See TracChangeset for help on using the changeset viewer.