Changeset 897 for trunk/gsdl/perllib/plugins/HTMLPlug.pm
- Timestamp:
- 2000-02-02T14:30:47+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r850 r897 63 63 print STDERR " -block_exp Files matching this regular expression will be blocked from\n"; 64 64 print STDERR " being passed to any further plugins in the list. By default\n"; 65 print STDERR " HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf \n";66 print STDERR " or .rtffile extensions.\n";65 print STDERR " HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf,\n"; 66 print STDERR " .rtf or .css file extensions.\n"; 67 67 print STDERR " -keep_head Don't remove headers from html files.\n"; 68 68 print STDERR " -no_metadata Don't attempt to extract any metadata from files.\n"; 69 69 print STDERR " -metadata_fields Comma separated list of metadata fields to attempt to extract.\n"; 70 70 print STDERR " Defaults to 'Title'\n"; 71 print STDERR " -w3mir Set if w3mir was used to generate input file structure.\n\n"; 71 print STDERR " -w3mir Set if w3mir was used to generate input file structure.\n"; 72 print STDERR " w3mir \n"; 72 73 } 73 74 … … 79 80 q^process_exp/.*/(?i)\.html?$^, \$self->{'process_exp'}, 80 81 q^nolinks^, \$self->{'nolinks'}, 81 q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf )$^, \$self->{'block_exp'},82 q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^, \$self->{'block_exp'}, 82 83 q^keep_head^, \$self->{'keep_head'}, 83 84 q^no_metadata^, \$self->{'no_metadata'}, … … 159 160 $self->replace_usemap_links($1, $2, $3)/isge; 160 161 161 $text =~ s/(<(?:a|area|frame )\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/162 $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj )/isge;162 $text =~ s/(<(?:a|area|frame|link)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 163 $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 163 164 } 164 165 165 166 # trap images 166 167 $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 167 $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj )/isge;168 $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 168 169 169 170 $doc_obj->add_text ($cursection, $text); … … 180 181 sub replace_images { 181 182 my $self = shift (@_); 182 my ($front, $link, $back, $base_dir, $file, $doc_obj) = @_; 183 my ($front, $link, $back, $base_dir, 184 $file, $doc_obj, $section) = @_; 183 185 184 186 $link =~ s/\n/ /g; 185 187 186 188 my ($href, $hash_part, $rl) = $self->format_link ($link, $base_dir, $file); 187 return $front . $self->add_file ($href, $base_dir, $doc_obj ) . $back;189 return $front . $self->add_file ($href, $base_dir, $doc_obj, $section) . $back; 188 190 } 189 191 190 192 sub replace_href_links { 191 193 my $self = shift (@_); 192 my ($front, $link, $back, $base_dir, $file, $doc_obj ) = @_;194 my ($front, $link, $back, $base_dir, $file, $doc_obj, $section) = @_; 193 195 194 196 # attempt to sort out targets - frames are not handled … … 207 209 208 210 my ($filename) = $href =~ /^(?:.*?):(?:\/\/)?(.*)/; 211 212 ##### leave all these links alone (they won't be picked up by intermediate 213 ##### pages). I think that's safest when dealing with frames, targets etc. 214 ##### (at least until I think of a better way to do it). Problems occur with 215 ##### mailto links from within small frames, the intermediate page is displayed 216 ##### within that frame and can't be seen. There is still potential for this to 217 ##### happen even with html pages - the solution seems to be to somehow tell 218 ##### the browser from the server side to display the page being sent (i.e. 219 ##### the intermediate page) in the top level window - I'm not sure if that's 220 ##### possible - the following line should probably be deleted if that can be done 221 return $front . $link . $back if $href =~ /^(mailto|news|gopher|nntp|telnet|javascript):/is; 222 223 209 224 if (($rl == 0) || ($filename =~ /$self->{'process_exp'}/) || 210 225 ($href =~ /\/$/) || ($href =~ /^(mailto|news|gopher|nntp|telnet|javascript):/i)) { 211 $link = $href . $hash_part; 212 &html::urlsafe ($link); 213 return $front . "_httpextlink_&href=" . $link . "&rl=" . $rl . $back; 226 &html::urlsafe ($href); 227 return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back; 214 228 215 229 } else { 216 230 # link is to some other type of file (image, pdf etc.) so we'll 217 231 # need to associate that file 218 return $front . $self->add_file ($href, $base_dir, $doc_obj ) . $back;232 return $front . $self->add_file ($href, $base_dir, $doc_obj, $section) . $back; 219 233 } 220 234 } … … 222 236 sub add_file { 223 237 my $self = shift (@_); 224 my ($href, $base_dir, $doc_obj ) = @_;238 my ($href, $base_dir, $doc_obj, $section) = @_; 225 239 my ($newname); 226 240 … … 236 250 $self->inc_filecount (); 237 251 } 238 $doc_obj->associate_file($filename, $newname );252 $doc_obj->associate_file($filename, $newname, undef, $section); 239 253 return "_httpcollimg_/$newname"; 240 254 }
Note:
See TracChangeset
for help on using the changeset viewer.