1 | package HTML::Filter;
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 | use vars qw(@ISA $VERSION);
|
---|
5 |
|
---|
6 | require HTML::Parser;
|
---|
7 | @ISA=qw(HTML::Parser);
|
---|
8 |
|
---|
9 | $VERSION = sprintf("%d.%02d", q$Revision: 14078 $ =~ /(\d+)\.(\d+)/);
|
---|
10 |
|
---|
11 | sub declaration { $_[0]->output("<!$_[1]>") }
|
---|
12 | sub process { $_[0]->output($_[2]) }
|
---|
13 | sub comment { $_[0]->output("<!--$_[1]-->") }
|
---|
14 | sub start { $_[0]->output($_[4]) }
|
---|
15 | sub end { $_[0]->output($_[2]) }
|
---|
16 | sub text { $_[0]->output($_[1]) }
|
---|
17 |
|
---|
18 | sub output { print $_[1] }
|
---|
19 |
|
---|
20 | 1;
|
---|
21 |
|
---|
22 | __END__
|
---|
23 |
|
---|
24 | =head1 NAME
|
---|
25 |
|
---|
26 | HTML::Filter - Filter HTML text through the parser
|
---|
27 |
|
---|
28 | =head1 NOTE
|
---|
29 |
|
---|
30 | B<This module is deprecated.> The C<HTML::Parser> now provides the
|
---|
31 | functionally of C<HTML::Filter> much more efficiently with the the
|
---|
32 | C<default> handler.
|
---|
33 |
|
---|
34 | =head1 SYNOPSIS
|
---|
35 |
|
---|
36 | require HTML::Filter;
|
---|
37 | $p = HTML::Filter->new->parse_file("index.html");
|
---|
38 |
|
---|
39 | =head1 DESCRIPTION
|
---|
40 |
|
---|
41 | C<HTML::Filter> is an HTML parser that by default prints the
|
---|
42 | original text of each HTML element (a slow version of cat(1) basically).
|
---|
43 | The callback methods may be overridden to modify the filtering for some
|
---|
44 | HTML elements and you can override output() method which is called to
|
---|
45 | print the HTML text.
|
---|
46 |
|
---|
47 | C<HTML::Filter> is a subclass of C<HTML::Parser>. This means that
|
---|
48 | the document should be given to the parser by calling the $p->parse()
|
---|
49 | or $p->parse_file() methods.
|
---|
50 |
|
---|
51 | =head1 EXAMPLES
|
---|
52 |
|
---|
53 | The first example is a filter that will remove all comments from an
|
---|
54 | HTML file. This is achieved by simply overriding the comment method
|
---|
55 | to do nothing.
|
---|
56 |
|
---|
57 | package CommentStripper;
|
---|
58 | require HTML::Filter;
|
---|
59 | @ISA=qw(HTML::Filter);
|
---|
60 | sub comment { } # ignore comments
|
---|
61 |
|
---|
62 | The second example shows a filter that will remove any E<lt>TABLE>s
|
---|
63 | found in the HTML file. We specialize the start() and end() methods
|
---|
64 | to count table tags and then make output not happen when inside a
|
---|
65 | table.
|
---|
66 |
|
---|
67 | package TableStripper;
|
---|
68 | require HTML::Filter;
|
---|
69 | @ISA=qw(HTML::Filter);
|
---|
70 | sub start
|
---|
71 | {
|
---|
72 | my $self = shift;
|
---|
73 | $self->{table_seen}++ if $_[0] eq "table";
|
---|
74 | $self->SUPER::start(@_);
|
---|
75 | }
|
---|
76 |
|
---|
77 | sub end
|
---|
78 | {
|
---|
79 | my $self = shift;
|
---|
80 | $self->SUPER::end(@_);
|
---|
81 | $self->{table_seen}-- if $_[0] eq "table";
|
---|
82 | }
|
---|
83 |
|
---|
84 | sub output
|
---|
85 | {
|
---|
86 | my $self = shift;
|
---|
87 | unless ($self->{table_seen}) {
|
---|
88 | $self->SUPER::output(@_);
|
---|
89 | }
|
---|
90 | }
|
---|
91 |
|
---|
92 | If you want to collect the parsed text internally you might want to do
|
---|
93 | something like this:
|
---|
94 |
|
---|
95 | package FilterIntoString;
|
---|
96 | require HTML::Filter;
|
---|
97 | @ISA=qw(HTML::Filter);
|
---|
98 | sub output { push(@{$_[0]->{fhtml}}, $_[1]) }
|
---|
99 | sub filtered_html { join("", @{$_[0]->{fhtml}}) }
|
---|
100 |
|
---|
101 | =head1 SEE ALSO
|
---|
102 |
|
---|
103 | L<HTML::Parser>
|
---|
104 |
|
---|
105 | =head1 COPYRIGHT
|
---|
106 |
|
---|
107 | Copyright 1997-1999 Gisle Aas.
|
---|
108 |
|
---|
109 | This library is free software; you can redistribute it and/or
|
---|
110 | modify it under the same terms as Perl itself.
|
---|
111 |
|
---|
112 | =cut
|
---|