Changeset 16012
- Timestamp:
- 2008-06-16T11:15:04+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/ReadTextFile.pm
r15970 r16012 70 70 'deft' => "en", 71 71 'reqd' => "no" }, 72 { 'name' => "first",73 'desc' => "{ReadTextFile.first}",74 'type' => "string",75 'reqd' => "no" },76 72 { 'name' => "separate_cjk", 77 73 'desc' => "{ReadTextFile.separate_cjk}", … … 196 192 return (1,$doc_obj); 197 193 } 198 199 sub auto_extract_metadata {200 my $self = shift(@_);201 my ($doc_obj) = @_;202 203 if ($self->{'first'}) {204 my $thissection = $doc_obj->get_top_section();205 while (defined $thissection) {206 my $text = $doc_obj->get_text($thissection);207 $self->extract_first_NNNN_characters (\$text, $doc_obj, $thissection) if $text =~ /./;208 $thissection = $doc_obj->get_next_section ($thissection);209 }210 }211 $self->SUPER::auto_extract_metadata($doc_obj);212 }213 194 214 195 # uses the multiread package to read in the entire file pointed to … … 454 435 455 436 456 # FIRSTNNN: extract the first NNN characters as metadata457 sub extract_first_NNNN_characters {458 my $self = shift (@_);459 my ($textref, $doc_obj, $thissection) = @_;460 461 foreach my $size (split /,/, $self->{'first'}) {462 my $tmptext = $$textref;463 $tmptext =~ s/^\s+//;464 $tmptext =~ s/\s+$//;465 $tmptext =~ s/\s+/ /gs;466 $tmptext = substr ($tmptext, 0, $size);467 $tmptext =~ s/\s\S*$/…/;468 $doc_obj->add_utf8_metadata ($thissection, "First$size", $tmptext);469 }470 }471 472 437 # Overridden by exploding plugins (eg. ISISPlug) 473 438 sub clean_up_after_exploding
Note:
See TracChangeset
for help on using the changeset viewer.