1 | # $Id: Tree.pm,v 1.2 2003-07-31 07:54:51 matt Exp $
|
---|
2 |
|
---|
3 | package XML::Parser::Style::Tree;
|
---|
4 | $XML::Parser::Built_In_Styles{Tree} = 1;
|
---|
5 |
|
---|
6 | sub Init {
|
---|
7 | my $expat = shift;
|
---|
8 | $expat->{Lists} = [];
|
---|
9 | $expat->{Curlist} = $expat->{Tree} = [];
|
---|
10 | }
|
---|
11 |
|
---|
12 | sub Start {
|
---|
13 | my $expat = shift;
|
---|
14 | my $tag = shift;
|
---|
15 | my $newlist = [ { @_ } ];
|
---|
16 | push @{ $expat->{Lists} }, $expat->{Curlist};
|
---|
17 | push @{ $expat->{Curlist} }, $tag => $newlist;
|
---|
18 | $expat->{Curlist} = $newlist;
|
---|
19 | }
|
---|
20 |
|
---|
21 | sub End {
|
---|
22 | my $expat = shift;
|
---|
23 | my $tag = shift;
|
---|
24 | $expat->{Curlist} = pop @{ $expat->{Lists} };
|
---|
25 | }
|
---|
26 |
|
---|
27 | sub Char {
|
---|
28 | my $expat = shift;
|
---|
29 | my $text = shift;
|
---|
30 | my $clist = $expat->{Curlist};
|
---|
31 | my $pos = $#$clist;
|
---|
32 |
|
---|
33 | if ($pos > 0 and $clist->[$pos - 1] eq '0') {
|
---|
34 | $clist->[$pos] .= $text;
|
---|
35 | } else {
|
---|
36 | push @$clist, 0 => $text;
|
---|
37 | }
|
---|
38 | }
|
---|
39 |
|
---|
40 | sub Final {
|
---|
41 | my $expat = shift;
|
---|
42 | delete $expat->{Curlist};
|
---|
43 | delete $expat->{Lists};
|
---|
44 | $expat->{Tree};
|
---|
45 | }
|
---|
46 |
|
---|
47 | 1;
|
---|
48 | __END__
|
---|
49 |
|
---|
50 | =head1 NAME
|
---|
51 |
|
---|
52 | XML::Parser::Style::Tree
|
---|
53 |
|
---|
54 | =head1 SYNOPSIS
|
---|
55 |
|
---|
56 | use XML::Parser;
|
---|
57 | my $p = XML::Parser->new(Style => 'Tree');
|
---|
58 | my $tree = $p->parsefile('foo.xml');
|
---|
59 |
|
---|
60 | =head1 DESCRIPTION
|
---|
61 |
|
---|
62 | This module implements XML::Parser's Tree style parser.
|
---|
63 |
|
---|
64 | When parsing a document, C<parse()> will return a parse tree for the
|
---|
65 | document. Each node in the tree
|
---|
66 | takes the form of a tag, content pair. Text nodes are represented with
|
---|
67 | a pseudo-tag of "0" and the string that is their content. For elements,
|
---|
68 | the content is an array reference. The first item in the array is a
|
---|
69 | (possibly empty) hash reference containing attributes. The remainder of
|
---|
70 | the array is a sequence of tag-content pairs representing the content
|
---|
71 | of the element.
|
---|
72 |
|
---|
73 | So for example the result of parsing:
|
---|
74 |
|
---|
75 | <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
|
---|
76 |
|
---|
77 | would be:
|
---|
78 | Tag Content
|
---|
79 | ==================================================================
|
---|
80 | [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]],
|
---|
81 | bar, [ {}, 0, "Howdy", ref, [{}]],
|
---|
82 | 0, "do"
|
---|
83 | ]
|
---|
84 | ]
|
---|
85 |
|
---|
86 | The root document "foo", has 3 children: a "head" element, a "bar"
|
---|
87 | element and the text "do". After the empty attribute hash, these are
|
---|
88 | represented in it's contents by 3 tag-content pairs.
|
---|
89 |
|
---|
90 | =cut
|
---|