Changeset 5066


Ignore:
Timestamp:
2003-07-30T10:46:47+12:00 (21 years ago)
Author:
kjdon
Message:

changed HTMLPLug to extract multiple values for the same metadata name

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r4873 r5066  
    594594    my ($textref, $metadata, $doc_obj, $section) = @_;
    595595    my $outhandle = $self->{'outhandle'};
    596 
    597596    # if we don't want metadata, we may as well not be here ...
    598597    return if (!defined $self->{'metadata_fields'});
     
    617616    }
    618617    }
    619 
     618   
    620619    foreach my $field (split /,/, $self->{'metadata_fields'}) {
    621 
     620    my $found = 0;
    622621    # don't need to extract field if it was passed in from a previous
    623622    # (recursive) plugin
     
    625624
    626625    # see if there's a <meta> tag for this field
    627     if ($$textref =~ /<meta(\s*?)(?:name|http-equiv)\s*=\s*\"?$field\"?([^>]*)/is) {
     626    while ($$textref =~ /<meta(\s*?)(?:name|http-equiv)\s*=\s*\"?$field\"?([^>]*)/isg) {
    628627        my $content = $1 . $2;
    629628        if ($content =~ /content\s*=\s*\"?(.*)\"?/is) {
     
    636635            print $outhandle " extracted \"$field\" metadata \"$value\"\n"
    637636            if ($self->{'verbosity'} > 2);
    638             next;
     637            $found = 1;
    639638        }
    640639        }
    641640    }
    642    
     641    next if $found;
    643642    # TITLE: extract the document title
    644643   
     
    681680    }
    682681
    683 
    684682        # tag: extract the text between the first <H1> and </H1> tags
    685683        if ($field =~ /^tag[a-z0-9]+$/i) {
     
    704702            }
    705703            next;
    706         }
     704        }   
    707705    }
    708706}
Note: See TracChangeset for help on using the changeset viewer.