Changeset 2819


Ignore:
Timestamp:
2001-11-05T22:49:46+13:00 (22 years ago)
Author:
sjboddie
Message:

Altered HTMLPlug's description_tags option a bit so it should now also
work for plugins derived from HTMLPlug (i.e. RTFPlug, WordPlug, PDFPlug,
PSPlug etc). Tested briefly with RTFPlug and WordPlug and it seems ok.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r2817 r2819  
    156156    if ($self->{'description_tags'}) {
    157157
     158    my $opencom = '(?:<!--|&lt;!(?:&mdash;|&#151;|--))';
     159    my $closecom = '(?:-->|(?:&mdash;|&#151;|--)&gt;)';
     160    my $lt = '(?:<|&lt;)';
     161    my $gt = '(?:>|&gt;)';
     162    my $quot = '(?:"|&quot;|&rdquo;|&ldquo;)';
     163
    158164    my $found_something = 0; my $top = 1;
    159     while ($$textref =~ s/^(.*?)<!--(.*?)-->//s) {
     165    while ($$textref =~ s/^(.*?)$opencom(.*?)$closecom//s) {
    160166        my $text = $1;
    161167        my $comment = $2;
     
    163169        $self->process_section(\$text, $base_dir, $file, $doc_obj, $cursection);
    164170        }
    165         while ($comment =~ s/<([^>]+)>//s) {
     171        while ($comment =~ s/$lt(.*?)$gt//s) {
     172       
    166173        my $tag = $1;
    167174        if ($tag eq "Section") {
     
    172179            $found_something = 1;
    173180            $cursection = $doc_obj->get_parent_section ($cursection);
    174         } elsif ($tag =~ /^Metadata name=\"([^\"]+)\"/s) {
     181        } elsif ($tag =~ /^Metadata name=$quot(.*?)$quot/s) {
    175182            my $metaname = $1;
    176             $comment =~ s/^(.*?)<\/Metadata>//s;
     183            $comment =~ s/^(.*?)$lt\/Metadata$gt//s;
    177184            my $metavalue = $1;
    178185            $metavalue =~ s/^\s+//;
    179186            $metavalue =~ s/\s+$//;
     187                    # assume that no metadata value intentionally includes
     188                    # carriage returns or HTML tags (if they're there they
     189                    # were probably introduced when converting to HTML from
     190                    # some other format).
     191            $metavalue =~ s/[\cJ\cM]/ /sg;
     192            $metavalue =~ s/<[^>]+>//sg;
     193            $metavalue =~ s/\s+/ /sg;
    180194            $doc_obj->set_utf8_metadata_element($cursection, $metaname, $metavalue);
     195        } elsif ($tag eq "Description" || $tag eq "/Description") {
     196            # do nothing with containing Description tags
     197        } else {
     198            # simple HTML tag (probably created by the conversion
     199            # to HTML from some other format) - we'll ignore it and
     200            # hope for the best ;-)
    181201        }
    182202        }
     
    197217        print $outhandle "          of the final closing </Section> tag. This text will\n";
    198218        print $outhandle "          be ignored.";
     219        my ($text);
    199220        if (length($$textref) > 30) {
    200221            $text = substr($$textref, 0, 30) . "...";
     222        } else {
     223            $text = $$textref;
    201224        }
    202225        $text =~ s/\n/ /isg;
Note: See TracChangeset for help on using the changeset viewer.