Changeset 12424


Ignore:
Timestamp:
2006-08-09T15:16:46+12:00 (18 years ago)
Author:
mdewsnip
Message:

Turned "use strict" on and tried to tidy up some confusing code.

Location:
trunk/gsdl/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/lucenebuildproc.pm

    r12371 r12424  
    3333use mgppbuildproc;
    3434use ghtml;
     35use strict;
     36no strict 'refs'; # allow filehandles to be variables and viceversa
     37
    3538
    3639sub BEGIN {
     
    161164
    162165        my $new_text = "";
    163         my $tmp_text = "";
    164166
    165167        # we get allfields by default - do nothing
     
    171173        my $shortname = "";
    172174        my $metadata = $doc_obj->get_all_metadata ($section);
    173         foreach $pair (@$metadata) {
     175        foreach my $pair (@$metadata) {
    174176            my ($mfield, $mvalue) = (@$pair);
    175177            # check fields here, maybe others dont want - change to use dontindex!!
     
    209211       
    210212        my @metadata_list = ();
    211         foreach $submeta (split /,/, $real_field) {
     213        foreach my $submeta (split /,/, $real_field) {
    212214            if ($submeta eq "text") {
    213             if ($self->{'indexing_text'}) { #tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed
     215            my $section_text = $doc_obj->get_text($section);
     216            if ($self->{'indexing_text'}) {
     217                            # tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed
    214218                $new_text .= "$parastarttag<$shortname index=\"1\">\n";
    215                 $tmp_text .= $doc_obj->get_text ($section);
    216219                if ($parastarttag ne "") {
    217                 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "</$shortname>$paraendtag$parastarttag<$shortname index=\"1\">");
    218                 } else {
     220                $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paraendtag$parastarttag<$shortname index=\"1\">");
     221                }
     222                else {
    219223                # we don't want to individually tag each paragraph if not doing para indexing
    220                 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "");
    221                 }
    222                 $new_text .= "$tmp_text</$shortname>$paraendtag\n";
    223             }
    224             else { # leave html stuff in, but escape the tags, and dont add Paragraph tags - never retrieve paras at the moment
    225                 $tmp_text .= $doc_obj->get_text ($section);
    226                 &ghtml::htmlsafe($tmp_text);
    227                 $new_text .= $tmp_text;
    228                
     224                $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "");
     225                }
     226                $new_text .= "$section_text</$shortname>$paraendtag\n";
     227            }
     228            else {
     229                            # leave html stuff in, but escape the tags, and don't add Paragraph tags - never retrieve paras at the moment
     230                &ghtml::htmlsafe($section_text);
     231                $new_text .= $section_text;
    229232            }
    230233            }
  • trunk/gsdl/perllib/mgppbuildproc.pm

    r12371 r12424  
    3131
    3232use basebuildproc;
     33use strict;
     34no strict 'refs'; # allow filehandles to be variables and viceversa
     35
    3336
    3437BEGIN {
     
    257260       
    258261        my $new_text = "";
    259         my $tmp_text = "";
    260262
    261263        # we get allfields by default - do nothing
     
    305307        }
    306308        my @metadata_list = ();
    307         foreach $submeta (split /,/, $real_field) {
     309        foreach my $submeta (split /,/, $real_field) {
    308310            if ($submeta eq "text") {
    309             if ($self->{'indexing_text'}) { #tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed
     311            my $section_text = $doc_obj->get_text($section);
     312            if ($self->{'indexing_text'}) {
     313                # tag the text with <Text>...</Text>, add the <Paragraph> tags and strip out html if needed
    310314                $new_text .= "$paratag<$shortname>\n";
    311                 $tmp_text .= $doc_obj->get_text ($section);
    312315                if ($paratag ne "") {
    313                 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "</$shortname>$paratag<$shortname>");
    314                 } else {
    315                 $tmp_text = $self->preprocess_text($tmp_text, $self->{'strip_html'}, "");
     316                $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "</$shortname>$paratag<$shortname>");
    316317                }
    317                 $new_text .= "$tmp_text</$shortname>\n";
     318                else {
     319                $section_text = $self->preprocess_text($section_text, $self->{'strip_html'}, "");
     320                }
     321                $new_text .= "$section_text</$shortname>\n";
    318322            }
    319             else { # leave html stuff in, and dont add Paragraph tags - never retrieve paras at the moment
    320                 $new_text .= $doc_obj->get_text ($section);
     323            else {
     324                            # leave html stuff in, and don't add Paragraph tags - never retrieve paras at the moment
     325                $new_text .= $section_text;
    321326            }
    322             } else {
     327            }
     328            else {
    323329            my @section_metadata = @{$doc_obj->get_metadata ($section, $submeta)};
    324330            if ($section ne $doc_obj->get_top_section() && $self->{'indexing_text'} && defined ($self->{'sections_index_document_metadata'})) {
Note: See TracChangeset for help on using the changeset viewer.