Changeset 12424
- Timestamp:
- 2006-08-09T15:16:46+12:00 (18 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/lucenebuildproc.pm
r12371 r12424 33 33 use mgppbuildproc; 34 34 use ghtml; 35 use strict; 36 no strict 'refs'; # allow filehandles to be variables and viceversa 37 35 38 36 39 sub BEGIN { … … 161 164 162 165 my $new_text = ""; 163 my $tmp_text = "";164 166 165 167 # we get allfields by default - do nothing … … 171 173 my $shortname = ""; 172 174 my $metadata = $doc_obj->get_all_metadata ($section); 173 foreach $pair (@$metadata) {175 foreach my $pair (@$metadata) { 174 176 my ($mfield, $mvalue) = (@$pair); 175 177 # check fields here, maybe others dont want - change to use dontindex!! … … 209 211 210 212 my @metadata_list = (); 211 foreach $submeta (split /,/, $real_field) {213 foreach my $submeta (split /,/, $real_field) { 212 214 if ($submeta eq "text") { 213 if ($self->{'indexing_text'}) { #tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed 215 my $section_text = $doc_obj->get_text($section); 216 if ($self->{'indexing_text'}) { 217 # tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed 214 218 $new_text .= "$parastarttag<$shortname index=\"1\">\n"; 215 $tmp_text .= $doc_obj->get_text ($section);216 219 if ($parastarttag ne "") { 217 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "</$shortname>$paraendtag$parastarttag<$shortname index=\"1\">"); 218 } else { 220 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paraendtag$parastarttag<$shortname index=\"1\">"); 221 } 222 else { 219 223 # we don't want to individually tag each paragraph if not doing para indexing 220 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, ""); 221 } 222 $new_text .= "$tmp_text</$shortname>$paraendtag\n"; 223 } 224 else { # leave html stuff in, but escape the tags, and dont add Paragraph tags - never retrieve paras at the moment 225 $tmp_text .= $doc_obj->get_text ($section); 226 &ghtml::htmlsafe($tmp_text); 227 $new_text .= $tmp_text; 228 224 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, ""); 225 } 226 $new_text .= "$section_text</$shortname>$paraendtag\n"; 227 } 228 else { 229 # leave html stuff in, but escape the tags, and don't add Paragraph tags - never retrieve paras at the moment 230 &ghtml::htmlsafe($section_text); 231 $new_text .= $section_text; 229 232 } 230 233 } -
trunk/gsdl/perllib/mgppbuildproc.pm
r12371 r12424 31 31 32 32 use basebuildproc; 33 use strict; 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 33 36 34 37 BEGIN { … … 257 260 258 261 my $new_text = ""; 259 my $tmp_text = "";260 262 261 263 # we get allfields by default - do nothing … … 305 307 } 306 308 my @metadata_list = (); 307 foreach $submeta (split /,/, $real_field) {309 foreach my $submeta (split /,/, $real_field) { 308 310 if ($submeta eq "text") { 309 if ($self->{'indexing_text'}) { #tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed 311 my $section_text = $doc_obj->get_text($section); 312 if ($self->{'indexing_text'}) { 313 # tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed 310 314 $new_text .= "$paratag<$shortname>\n"; 311 $tmp_text .= $doc_obj->get_text ($section);312 315 if ($paratag ne "") { 313 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "</$shortname>$paratag<$shortname>"); 314 } else { 315 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, ""); 316 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paratag<$shortname>"); 316 317 } 317 $new_text .= "$tmp_text</$shortname>\n"; 318 else { 319 $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, ""); 320 } 321 $new_text .= "$section_text</$shortname>\n"; 318 322 } 319 else { # leave html stuff in, and dont add Paragraph tags - never retrieve paras at the moment 320 $new_text .= $doc_obj->get_text ($section); 323 else { 324 # leave html stuff in, and don't add Paragraph tags - never retrieve paras at the moment 325 $new_text .= $section_text; 321 326 } 322 } else { 327 } 328 else { 323 329 my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)}; 324 330 if ($section ne $doc_obj->get_top_section() && $self->{'indexing_text'} && defined ($self->{'sections_index_document_metadata'})) {
Note:
See TracChangeset
for help on using the changeset viewer.