Changeset 16011
- Timestamp:
- 2008-06-16T11:14:30+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/AutoExtractMetadata.pm
r15919 r16011 45 45 } 46 46 47 my $arguments = []; 47 my $arguments = [ 48 {'name' => "first", 49 'desc' => "{AutoExtractMetadata.first}", 50 'type' => "string", 51 'reqd' => "no" } 52 ]; 48 53 49 54 … … 96 101 } 97 102 98 # here is where we call methods from the supporting plugins - gis and textextract103 # here is where we call methods from the supporting extractor plugins 99 104 sub auto_extract_metadata { 100 105 my $self = shift(@_); 101 106 my ($doc_obj) = @_; 102 107 108 if ($self->{'first'}) { 109 my $thissection = $doc_obj->get_top_section(); 110 while (defined $thissection) { 111 my $text = $doc_obj->get_text($thissection); 112 $self->extract_first_NNNN_characters (\$text, $doc_obj, $thissection) if $text =~ /./; 113 $thissection = $doc_obj->get_next_section ($thissection); 114 } 115 } 103 116 $self->extract_acronym_metadata($doc_obj); 104 117 $self->extract_keyphrase_metadata($doc_obj); … … 107 120 $self->extract_gis_metadata($doc_obj); 108 121 122 } 123 124 125 # FIRSTNNN: extract the first NNN characters as metadata 126 sub extract_first_NNNN_characters { 127 my $self = shift (@_); 128 my ($textref, $doc_obj, $thissection) = @_; 129 130 foreach my $size (split /,/, $self->{'first'}) { 131 my $tmptext = $$textref; 132 $tmptext =~ s/^\s+//; 133 $tmptext =~ s/\s+$//; 134 $tmptext =~ s/\s+/ /gs; 135 $tmptext = substr ($tmptext, 0, $size); 136 $tmptext =~ s/\s\S*$/…/; 137 $doc_obj->add_utf8_metadata ($thissection, "First$size", $tmptext); 138 } 109 139 } 110 140
Note:
See TracChangeset
for help on using the changeset viewer.