Changeset 5066
- Timestamp:
- 2003-07-30T10:46:47+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r4873 r5066 594 594 my ($textref, $metadata, $doc_obj, $section) = @_; 595 595 my $outhandle = $self->{'outhandle'}; 596 597 596 # if we don't want metadata, we may as well not be here ... 598 597 return if (!defined $self->{'metadata_fields'}); … … 617 616 } 618 617 } 619 618 620 619 foreach my $field (split /,/, $self->{'metadata_fields'}) { 621 620 my $found = 0; 622 621 # don't need to extract field if it was passed in from a previous 623 622 # (recursive) plugin … … 625 624 626 625 # see if there's a <meta> tag for this field 627 if ($$textref =~ /<meta(\s*?)(?:name|http-equiv)\s*=\s*\"?$field\"?([^>]*)/is) {626 while ($$textref =~ /<meta(\s*?)(?:name|http-equiv)\s*=\s*\"?$field\"?([^>]*)/isg) { 628 627 my $content = $1 . $2; 629 628 if ($content =~ /content\s*=\s*\"?(.*)\"?/is) { … … 636 635 print $outhandle " extracted \"$field\" metadata \"$value\"\n" 637 636 if ($self->{'verbosity'} > 2); 638 next;637 $found = 1; 639 638 } 640 639 } 641 640 } 642 641 next if $found; 643 642 # TITLE: extract the document title 644 643 … … 681 680 } 682 681 683 684 682 # tag: extract the text between the first <H1> and </H1> tags 685 683 if ($field =~ /^tag[a-z0-9]+$/i) { … … 704 702 } 705 703 next; 706 } 704 } 707 705 } 708 706 }
Note:
See TracChangeset
for help on using the changeset viewer.