Changeset 16012


Ignore:
Timestamp:
2008-06-16T11:15:04+12:00 (16 years ago)
Author:
kjdon
Message:

moved the -first option to AutoExtractMetadata

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ReadTextFile.pm

    r15970 r16012  
    7070    'deft' => "en",
    7171    'reqd' => "no" },
    72        { 'name' => "first",
    73     'desc' => "{ReadTextFile.first}",
    74     'type' => "string",
    75     'reqd' => "no" },
    7672     { 'name' => "separate_cjk",
    7773    'desc' => "{ReadTextFile.separate_cjk}",
     
    196192    return (1,$doc_obj);
    197193}
    198 
    199 sub auto_extract_metadata {
    200     my $self = shift(@_);
    201     my ($doc_obj) = @_;
    202 
    203     if ($self->{'first'}) {
    204     my $thissection = $doc_obj->get_top_section();
    205     while (defined $thissection) {
    206         my $text = $doc_obj->get_text($thissection);
    207         $self->extract_first_NNNN_characters (\$text, $doc_obj, $thissection) if $text =~ /./;
    208         $thissection = $doc_obj->get_next_section ($thissection);
    209     }
    210     }
    211     $self->SUPER::auto_extract_metadata($doc_obj);
    212 
    213194
    214195# uses the multiread package to read in the entire file pointed to
     
    454435
    455436
    456 # FIRSTNNN: extract the first NNN characters as metadata
    457 sub extract_first_NNNN_characters {
    458     my $self = shift (@_);
    459     my ($textref, $doc_obj, $thissection) = @_;
    460    
    461     foreach my $size (split /,/, $self->{'first'}) {
    462     my $tmptext =  $$textref;
    463     $tmptext =~ s/^\s+//;
    464     $tmptext =~ s/\s+$//;
    465     $tmptext =~ s/\s+/ /gs;
    466     $tmptext = substr ($tmptext, 0, $size);
    467     $tmptext =~ s/\s\S*$/…/;
    468     $doc_obj->add_utf8_metadata ($thissection, "First$size", $tmptext);
    469     }
    470 }
    471 
    472437# Overridden by exploding plugins (eg. ISISPlug)
    473438sub clean_up_after_exploding
Note: See TracChangeset for help on using the changeset viewer.