Changeset 617


Ignore:
Timestamp:
1999-09-22T10:00:53+12:00 (25 years ago)
Author:
sjboddie
Message:

a few fixes

Location:
trunk/gsdl/perllib/plugins
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/GBTPlug.pm

    r592 r617  
    9696    $text = &cnseg::segment($text);
    9797
    98     $doc_obj->add_utf8_text($cursection, $text);
     98    $doc_obj->add_utf8_text($cursection, "<pre>$text</pre>");
    9999
    100100    # add Title metadata (filename);
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r589 r617  
    9393    my $text = "";
    9494    my $line = "";
    95     my $donehead = 0;
    9695    my $title = "";
    9796    while (defined ($line = <FILE>)) {
     
    9998    }
    10099
     100    # we'll use the worthless alarm thingy to temporarily replace
     101    # '\n' so we'd better check it doesn't occur naturally
     102    if ($text =~ /\a/) {
     103    print STDERR "HTMLPlug::read - 'WARNING '\a' character occurs in text!!\n";
     104    }
     105
    101106    # remove line breaks
    102     $text =~ s/\s+/ /g;
     107    $text =~ s/\n/\a/g;
    103108
    104109    # see if there's a <title> tag
     
    124129    $text =~ s/^.*?<body[^>]*>//i;
    125130
     131    # and any other unwanted tags
     132    $text =~ s/<(\/p|\/html|\/body)>//g;
     133
    126134    # fix up the image links
    127135    $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">]+)(\"?[^>]*>)/
    128136    &replace_image_links($absdir, $doc_obj, $1, $2, $3)/ige;
    129137
    130     # add a newline at the beginning of each paragraph
    131     $text =~ s/(.)\s*<p\b/$1\n\n<p/gi;
    132    
    133     # add a newline every 80 characters at a word boundary
    134     # Note: this regular expression puts a line feed before
    135     # the last word in each section, even when it is not
    136     # needed.
    137     $text =~ s/(.{1,80})\s/$1\n/g;
     138    # put line breaks back in
     139    $text =~ s/\a/\n/g;
    138140
    139141    $doc_obj->add_text ($cursection, $text);
Note: See TracChangeset for help on using the changeset viewer.