Changeset 897


Ignore:
Timestamp:
2000-02-02T14:30:47+13:00 (22 years ago)
Author:
sjboddie
Message:

lots of stuff

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r850 r897  
    6363    print STDERR "   -block_exp             Files matching this regular expression will be blocked from\n";
    6464    print STDERR "                          being passed to any further plugins in the list. By default\n";
    65     print STDERR "                          HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf\n";
    66     print STDERR "                          or .rtf file extensions.\n";
     65    print STDERR "                          HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf,\n";
     66    print STDERR "                          .rtf or .css file extensions.\n";
    6767    print STDERR "   -keep_head             Don't remove headers from html files.\n";
    6868    print STDERR "   -no_metadata           Don't attempt to extract any metadata from files.\n";
    6969    print STDERR "   -metadata_fields       Comma separated list of metadata fields to attempt to extract.\n";
    7070    print STDERR "                          Defaults to 'Title'\n";
    71     print STDERR "   -w3mir                 Set if w3mir was used to generate input file structure.\n\n";
     71    print STDERR "   -w3mir                 Set if w3mir was used to generate input file structure.\n";
     72    print STDERR "                          w3mir \n";
    7273}
    7374
     
    7980             q^process_exp/.*/(?i)\.html?$^, \$self->{'process_exp'},
    8081             q^nolinks^, \$self->{'nolinks'},
    81              q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf)$^, \$self->{'block_exp'},
     82             q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^, \$self->{'block_exp'},
    8283             q^keep_head^, \$self->{'keep_head'},
    8384             q^no_metadata^, \$self->{'no_metadata'},
     
    159160        $self->replace_usemap_links($1, $2, $3)/isge;
    160161
    161     $text =~ s/(<(?:a|area|frame)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
    162         $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj)/isge;
     162    $text =~ s/(<(?:a|area|frame|link)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
     163        $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
    163164    }
    164165
    165166    # trap images
    166167    $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
    167     $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj)/isge;
     168    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
    168169
    169170    $doc_obj->add_text ($cursection, $text);
     
    180181sub replace_images {
    181182    my $self = shift (@_);
    182     my ($front, $link, $back, $base_dir, $file, $doc_obj) = @_;
     183    my ($front, $link, $back, $base_dir,
     184    $file, $doc_obj, $section) = @_;
    183185   
    184186    $link =~ s/\n/ /g;
    185187   
    186188    my ($href, $hash_part, $rl) = $self->format_link ($link, $base_dir, $file);
    187     return $front . $self->add_file ($href, $base_dir, $doc_obj) . $back;
     189    return $front . $self->add_file ($href, $base_dir, $doc_obj, $section) . $back;
    188190}
    189191
    190192sub replace_href_links {
    191193    my $self = shift (@_);
    192     my ($front, $link, $back, $base_dir, $file, $doc_obj) = @_;
     194    my ($front, $link, $back, $base_dir, $file, $doc_obj, $section) = @_;
    193195
    194196    # attempt to sort out targets - frames are not handled
     
    207209
    208210    my ($filename) = $href =~ /^(?:.*?):(?:\/\/)?(.*)/;
     211
     212    ##### leave all these links alone (they won't be picked up by intermediate
     213    ##### pages). I think that's safest when dealing with frames, targets etc.
     214    ##### (at least until I think of a better way to do it). Problems occur with
     215    ##### mailto links from within small frames, the intermediate page is displayed
     216    ##### within that frame and can't be seen. There is still potential for this to
     217    ##### happen even with html pages - the solution seems to be to somehow tell
     218    ##### the browser from the server side to display the page being sent (i.e.
     219    ##### the intermediate page) in the top level window - I'm not sure if that's
     220    ##### possible - the following line should probably be deleted if that can be done
     221    return $front . $link . $back if $href =~ /^(mailto|news|gopher|nntp|telnet|javascript):/is;
     222
     223
    209224    if (($rl == 0) || ($filename =~ /$self->{'process_exp'}/) ||
    210225    ($href =~ /\/$/) || ($href =~ /^(mailto|news|gopher|nntp|telnet|javascript):/i)) {
    211     $link = $href . $hash_part;
    212     &html::urlsafe ($link);
    213     return $front . "_httpextlink_&href=" . $link . "&rl=" . $rl . $back;
     226    &html::urlsafe ($href);
     227    return $front . "_httpextlink_&rl=" . $rl . "&href=" . $href . $hash_part . $back;
    214228   
    215229    } else {
    216230    # link is to some other type of file (image, pdf etc.) so we'll
    217231    # need to associate that file
    218     return $front . $self->add_file ($href, $base_dir, $doc_obj) . $back;
     232    return $front . $self->add_file ($href, $base_dir, $doc_obj, $section) . $back;
    219233    }
    220234}
     
    222236sub add_file {
    223237    my $self = shift (@_);
    224     my ($href, $base_dir, $doc_obj) = @_;
     238    my ($href, $base_dir, $doc_obj, $section) = @_;
    225239    my ($newname);
    226240
     
    236250    $self->inc_filecount ();
    237251    }
    238     $doc_obj->associate_file($filename, $newname);
     252    $doc_obj->associate_file($filename, $newname, undef, $section);
    239253    return "_httpcollimg_/$newname";
    240254}
Note: See TracChangeset for help on using the changeset viewer.