Changeset 12424 for trunk/gsdl/perllib/lucenebuildproc.pm
- Timestamp:
- 2006-08-09T15:16:46+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/lucenebuildproc.pm
r12371 r12424 33 33 use mgppbuildproc; 34 34 use ghtml; 35 use strict; 36 no strict 'refs'; # allow filehandles to be variables and viceversa 37 35 38 36 39 sub BEGIN { … … 161 164 162 165 my $new_text = ""; 163 my $tmp_text = "";164 166 165 167 # we get allfields by default - do nothing … … 171 173 my $shortname = ""; 172 174 my $metadata = $doc_obj->get_all_metadata ($section); 173 foreach $pair (@$metadata) {175 foreach my $pair (@$metadata) { 174 176 my ($mfield, $mvalue) = (@$pair); 175 177 # check fields here, maybe others dont want - change to use dontindex!! … … 209 211 210 212 my @metadata_list = (); 211 foreach $submeta (split /,/, $real_field) {213 foreach my $submeta (split /,/, $real_field) { 212 214 if ($submeta eq "text") { 213 if ($self->{'indexing_text'}) { #tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed 215 my $section_text = $doc_obj->get_text($section); 216 if ($self->{'indexing_text'}) { 217 # tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed 214 218 $new_text .= "$parastarttag<$shortname index=\"1\">\n"; 215 $tmp_text .= $doc_obj->get_text ($section);216 219 if ($parastarttag ne "") { 217 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "</$shortname>$paraendtag$parastarttag<$shortname index=\"1\">"); 218 } else { 220 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paraendtag$parastarttag<$shortname index=\"1\">"); 221 } 222 else { 219 223 # we don't want to individually tag each paragraph if not doing para indexing 220 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, ""); 221 } 222 $new_text .= "$tmp_text</$shortname>$paraendtag\n"; 223 } 224 else { # leave html stuff in, but escape the tags, and dont add Paragraph tags - never retrieve paras at the moment 225 $tmp_text .= $doc_obj->get_text ($section); 226 &ghtml::htmlsafe($tmp_text); 227 $new_text .= $tmp_text; 228 224 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, ""); 225 } 226 $new_text .= "$section_text</$shortname>$paraendtag\n"; 227 } 228 else { 229 # leave html stuff in, but escape the tags, and don't add Paragraph tags - never retrieve paras at the moment 230 &ghtml::htmlsafe($section_text); 231 $new_text .= $section_text; 229 232 } 230 233 }
Note:
See TracChangeset
for help on using the changeset viewer.