Changeset 617
- Timestamp:
- 1999-09-22T10:00:53+12:00 (25 years ago)
- Location:
- trunk/gsdl/perllib/plugins
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/GBTPlug.pm
r592 r617 96 96 $text = &cnseg::segment($text); 97 97 98 $doc_obj->add_utf8_text($cursection, $text);98 $doc_obj->add_utf8_text($cursection, "<pre>$text</pre>"); 99 99 100 100 # add Title metadata (filename); -
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r589 r617 93 93 my $text = ""; 94 94 my $line = ""; 95 my $donehead = 0;96 95 my $title = ""; 97 96 while (defined ($line = <FILE>)) { … … 99 98 } 100 99 100 # we'll use the worthless alarm thingy to temporarily replace 101 # '\n' so we'd better check it doesn't occur naturally 102 if ($text =~ /\a/) { 103 print STDERR "HTMLPlug::read - 'WARNING '\a' character occurs in text!!\n"; 104 } 105 101 106 # remove line breaks 102 $text =~ s/\ s+//g;107 $text =~ s/\n/\a/g; 103 108 104 109 # see if there's a <title> tag … … 124 129 $text =~ s/^.*?<body[^>]*>//i; 125 130 131 # and any other unwanted tags 132 $text =~ s/<(\/p|\/html|\/body)>//g; 133 126 134 # fix up the image links 127 135 $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">]+)(\"?[^>]*>)/ 128 136 &replace_image_links($absdir, $doc_obj, $1, $2, $3)/ige; 129 137 130 # add a newline at the beginning of each paragraph 131 $text =~ s/(.)\s*<p\b/$1\n\n<p/gi; 132 133 # add a newline every 80 characters at a word boundary 134 # Note: this regular expression puts a line feed before 135 # the last word in each section, even when it is not 136 # needed. 137 $text =~ s/(.{1,80})\s/$1\n/g; 138 # put line breaks back in 139 $text =~ s/\a/\n/g; 138 140 139 141 $doc_obj->add_text ($cursection, $text);
Note:
See TracChangeset
for help on using the changeset viewer.