source: gs2-extensions/parallel-building/trunk/src/perllib/cpan/HTML/TokeParser/Simple/Token/Tag/End.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 2.7 KB
Line 
1package HTML::TokeParser::Simple::Token::Tag::End;
2
3use strict;
4
5use vars qw/ $VERSION $REVISION /;
6$REVISION = '$Id: End.pm 13983 2007-03-15 01:32:44Z lh92 $';
7$VERSION = '1.0';
8use base 'HTML::TokeParser::Simple::Token::Tag';
9
10my %TOKEN = (
11 tag => 1,
12 text => 2
13);
14
15# in order to maintain the 'drop-in replacement' ability with HTML::TokeParser,
16# we cannot alter the array refs. Thus we must store instance data here. Ugh.
17
18my %INSTANCE;
19
20sub _init {
21 my $self = shift;
22 if ('E' eq $self->[0]) {
23 $INSTANCE{$self}{offset} = 0;
24 $INSTANCE{$self}{tag} = $self->[1];
25 }
26 else {
27 $INSTANCE{$self}{offset} = -1;
28 my $tag = $self->[0];
29 $tag =~ s/^\///;
30 $INSTANCE{$self}{tag} = $tag;
31 }
32 return $self;
33}
34
35sub _get_offset { return $INSTANCE{+shift}{offset} }
36sub _get_text { return shift->[-1] }
37
38sub _get_tag {
39 my $self = shift;
40 return $INSTANCE{$self}{tag};
41}
42
43sub DESTROY { delete $INSTANCE{+shift} }
44
45sub rewrite_tag {
46 my $self = shift;
47 # capture the final slash if the tag is self-closing
48 my ($self_closing) = $self->_get_text =~ m{(\s?/)>$};
49 $self_closing ||= '';
50
51 my $first = $self->is_end_tag ? '/' : '';
52 my $tag = sprintf '<%s%s%s>', $first, $self->get_tag, $self_closing;
53 $self->_set_text($tag);
54 return $self;
55}
56
57sub return_text {
58 require Carp;
59 Carp::carp('return_text() is deprecated. Use as_is() instead');
60 goto &as_is;
61}
62
63sub as_is {
64 return shift->_get_text;
65}
66
67sub get_tag {
68 return shift->_get_tag;
69}
70
71# is_foo methods
72
73sub is_tag {
74 my $self = shift;
75 return $self->is_end_tag( @_ );
76}
77
78sub is_end_tag {
79 my ($self, $tag) = @_;
80 return $tag ? $self->_match_tag($tag) : 1;
81}
82
83sub _match_tag {
84 my ($self, $tag) = @_;
85 if ('Regexp' eq ref $tag) {
86 return $self->_get_tag =~ $tag;
87 }
88 else {
89 $tag = lc $tag;
90 $tag =~ s/^\///;
91 return $self->_get_tag eq $tag;
92 }
93}
94
951;
96
97__END__
98
99=head1 NAME
100
101HTML::TokeParser::Simple::Token::Tag::End - Token.pm "end tag" class.
102
103=head1 SYNOPSIS
104
105 use HTML::TokeParser::Simple;
106 my $p = HTML::TokeParser::Simple->new( $somefile );
107
108 while ( my $token = $p->get_token ) {
109 # This prints all text in an HTML doc (i.e., it strips the HTML)
110 next unless $token->is_text;
111 print $token->as_is;
112 }
113
114=head1 DESCRIPTION
115
116This class does most of the heavy lifting for C<HTML::TokeParser::Simple>. See
117the C<HTML::TokeParser::Simple> docs for details.
118
119=head1 OVERRIDDEN METHODS
120
121=over 4
122
123=item * as_is
124
125=item * get_tag
126
127=item * is_end_tag
128
129=item * is_tag
130
131=item * return_text
132
133=item * rewrite_tag
134
135=cut
Note: See TracBrowser for help on using the repository browser.