Changeset 897

Show
Ignore:
Timestamp:
02.02.2000 14:30:47 (20 years ago)
Author:
sjboddie
Message:

lots of stuff

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r850 r897  
    6363    print STDERR "   -block_exp             Files matching this regular expression will be blocked from\n"; 
    6464    print STDERR "                          being passed to any further plugins in the list. By default\n"; 
    65     print STDERR "                          HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf\n"; 
    66     print STDERR "                          or .rtf file extensions.\n"; 
     65    print STDERR "                          HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf,\n"; 
     66    print STDERR "                          .rtf or .css file extensions.\n"; 
    6767    print STDERR "   -keep_head             Don't remove headers from html files.\n"; 
    6868    print STDERR "   -no_metadata           Don't attempt to extract any metadata from files.\n"; 
    6969    print STDERR "   -metadata_fields       Comma separated list of metadata fields to attempt to extract.\n"; 
    7070    print STDERR "                          Defaults to 'Title'\n"; 
    71     print STDERR "   -w3mir                 Set if w3mir was used to generate input file structure.\n\n"; 
     71    print STDERR "   -w3mir                 Set if w3mir was used to generate input file structure.\n"; 
     72    print STDERR "                          w3mir \n"; 
    7273} 
    7374 
     
    7980             q^process_exp/.*/(?i)\.html?$^, \$self->{'process_exp'}, 
    8081             q^nolinks^, \$self->{'nolinks'}, 
    81              q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf)$^, \$self->{'block_exp'}, 
     82             q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^, \$self->{'block_exp'}, 
    8283             q^keep_head^, \$self->{'keep_head'}, 
    8384             q^no_metadata^, \$self->{'no_metadata'}, 
     
    159160        $self->replace_usemap_links($1, $2, $3)/isge; 
    160161 
    161     $text =~ s/(<(?:a|area|frame)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 
    162         $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj)/isge; 
     162    $text =~ s/(<(?:a|area|frame|link)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 
     163        $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 
    163164    } 
    164165 
    165166    # trap images 
    166167    $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 
    167     $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj)/isge; 
     168    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 
    168169 
    169170    $doc_obj->add_text ($cursection, $text); 
     
    180181sub replace_images { 
    181182    my $self = shift (@_); 
    182     my ($front, $link, $back, $base_dir, $file, $doc_obj) = @_; 
     183    my ($front, $link, $back, $base_dir,  
     184    $file, $doc_obj, $section) = @_; 
    183185     
    184186    $link =~ s/\n/ /g; 
    185187     
    186188    my ($href, $hash_part, $rl) = $self->format_link ($link, $base_dir, $file); 
    187     return $front . $self->add_file ($href, $base_dir, $doc_obj) . $back; 
     189    return $front . $self->add_file ($href, $base_dir, $doc_obj, $section) . $back; 
    188190} 
    189191 
    190192sub replace_href_links { 
    191193    my $self = shift (@_); 
    192     my ($front, $link, $back, $base_dir, $file, $doc_obj) = @_; 
     194    my ($front, $link, $back, $base_dir, $file, $doc_obj, $section) = @_; 
    193195 
    194196    # attempt to sort out targets - frames are not handled  
     
    207209 
    208210    my ($filename) = $href =~ /^(?:.*?):(?:\/\/)?(.*)/; 
     211 
     212    ##### leave all these links alone (they won't be picked up by intermediate  
     213    ##### pages). I think that's safest when dealing with frames, targets etc. 
     214    ##### (at least until I think of a better way to do it). Problems occur with 
     215    ##### mailto links from within small frames, the intermediate page is displayed 
     216    ##### within that frame and can't be seen. There is still potential for this to 
     217    ##### happen even with html pages - the solution seems to be to somehow tell 
     218    ##### the browser from the server side to display the page being sent (i.e.  
     219    ##### the intermediate page) in the top level window - I'm not sure if that's  
     220    ##### possible - the following line should probably be deleted if that can be done 
     221    return $front . $link . $back if $href =~ /^(mailto|news|gopher|nntp|telnet|javascript):/is; 
     222 
     223 
    209224    if (($rl == 0) || ($filename =~ /$self->{'process_exp'}/) ||  
    210225    ($href =~ /\/$/) || ($href =~ /^(mailto|news|gopher|nntp|telnet|javascript):/i)) { 
    211     $link = $href . $hash_part; 
    212     &html::urlsafe ($link); 
    213     return $front . "_httpextlink_&href=" . $link . "&rl=" . $rl . $back; 
     226    &html::urlsafe ($href); 
     227    return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back; 
    214228     
    215229    } else { 
    216230    # link is to some other type of file (image, pdf etc.) so we'll 
    217231    # need to associate that file 
    218     return $front . $self->add_file ($href, $base_dir, $doc_obj) . $back; 
     232    return $front . $self->add_file ($href, $base_dir, $doc_obj, $section) . $back; 
    219233    } 
    220234} 
     
    222236sub add_file { 
    223237    my $self = shift (@_); 
    224     my ($href, $base_dir, $doc_obj) = @_; 
     238    my ($href, $base_dir, $doc_obj, $section) = @_; 
    225239    my ($newname); 
    226240 
     
    236250    $self->inc_filecount (); 
    237251    } 
    238     $doc_obj->associate_file($filename, $newname); 
     252    $doc_obj->associate_file($filename, $newname, undef, $section); 
    239253    return "_httpcollimg_/$newname"; 
    240254}