source: trunk/gsdl/perllib/cpan/XML/XPath.pm@ 7909

Last change on this file since 7909 was 7909, checked in by mdewsnip, 20 years ago

CPAN module for processing XPath expressions.

  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1# $Id: XPath.pm 7909 2004-08-06 05:11:55Z mdewsnip $
2
3package XML::XPath;
4
5use strict;
6use vars qw($VERSION $AUTOLOAD $revision);
7
8$VERSION = '1.13';
9
10$XML::XPath::Namespaces = 1;
11$XML::XPath::Debug = 0;
12
13use XML::XPath::XMLParser;
14use XML::XPath::Parser;
15use IO::File;
16
17# For testing
18#use Data::Dumper;
19#$Data::Dumper::Indent = 1;
20
21# Parameters for new()
22my @options = qw(
23 filename
24 parser
25 xml
26 ioref
27 context
28 );
29
30sub new {
31 my $proto = shift;
32 my $class = ref($proto) || $proto;
33
34 my(%args);
35 # Try to figure out what the user passed
36 if ($#_ == 0) { # passed a scalar
37 my $string = $_[0];
38 if ($string =~ m{<.*?>}s) { # it's an XML string
39 $args{'xml'} = $string;
40 } elsif (ref($string)) { # read XML from file handle
41 $args{'ioref'} = $string;
42 } elsif ($string eq '-') { # read XML from stdin
43 $args{'ioref'} = IO::File->new($string);
44 } else { # read XML from a file
45 $args{'filename'} = $string;
46 }
47 } else { # passed a hash or hash reference
48 # just pass the parameters on to the XPath constructor
49 %args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_);
50 }
51
52 if ($args{filename} && (!-e $args{filename} || !-r $args{filename})) {
53 die "Cannot open file '$args{filename}'";
54 }
55 my %hash = map(( "_$_" => $args{$_} ), @options);
56 $hash{path_parser} = XML::XPath::Parser->new();
57 return bless \%hash, $class;
58}
59
60sub find {
61 my $self = shift;
62 my $path = shift;
63 my $context = shift;
64 die "No path to find" unless $path;
65
66 if (!defined $context) {
67 $context = $self->get_context;
68 }
69 if (!defined $context) {
70 # Still no context? Need to parse...
71 my $parser = XML::XPath::XMLParser->new(
72 filename => $self->get_filename,
73 xml => $self->get_xml,
74 ioref => $self->get_ioref,
75 parser => $self->get_parser,
76 );
77 $context = $parser->parse;
78 $self->set_context($context);
79# warn "CONTEXT:\n", Data::Dumper->Dumpxs([$context], ['context']);
80 }
81
82 my $parsed_path = $self->{path_parser}->parse($path);
83# warn "\n\nPATH: ", $parsed_path->as_string, "\n\n";
84
85# warn "evaluating path\n";
86 return $parsed_path->evaluate($context);
87}
88
89# sub memsize {
90# print STDERR @_, "\t";
91# open(FH, '/proc/self/status');
92# while(<FH>) {
93# print STDERR $_ if /^VmSize/;
94# }
95# close FH;
96# }
97#
98sub findnodes {
99 my $self = shift;
100 my ($path, $context) = @_;
101
102 my $results = $self->find($path, $context);
103
104 if ($results->isa('XML::XPath::NodeSet')) {
105 return wantarray ? $results->get_nodelist : $results;
106# return $results->get_nodelist;
107 }
108
109# warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug;
110 return wantarray ? () : XML::XPath::NodeSet->new();
111}
112
113sub matches {
114 my $self = shift;
115 my ($node, $path, $context) = @_;
116
117 my @nodes = $self->findnodes($path, $context);
118
119 if (grep { "$node" eq "$_" } @nodes) {
120 return 1;
121 }
122 return;
123}
124
125sub findnodes_as_string {
126 my $self = shift;
127 my ($path, $context) = @_;
128
129 my $results = $self->find($path, $context);
130
131 if ($results->isa('XML::XPath::NodeSet')) {
132 return join('', map { $_->toString } $results->get_nodelist);
133 }
134 elsif ($results->isa('XML::XPath::Node')) {
135 return $results->toString;
136 }
137 else {
138 return XML::XPath::Node::XMLescape($results->value);
139 }
140}
141
142sub findvalue {
143 my $self = shift;
144 my ($path, $context) = @_;
145
146 my $results = $self->find($path, $context);
147
148 if ($results->isa('XML::XPath::NodeSet')) {
149 return $results->to_literal;
150 }
151
152 return $results;
153}
154
155sub exists
156{
157 my $self = shift;
158 my ($path, $context) = @_;
159 $path = '/' if (!defined $path);
160 my @nodeset = $self->findnodes($path, $context);
161 return 1 if (scalar( @nodeset ));
162 return 0;
163}
164
165sub getNodeAsXML {
166 my $self = shift;
167 my $node_path = shift;
168 $node_path = '/' if (!defined $node_path);
169 if (ref($node_path)) {
170 return $node_path->as_string();
171 } else {
172 return $self->findnodes_as_string($node_path);
173 }
174}
175
176sub getNodeText {
177 my $self = shift;
178 my $node_path = shift;
179 if (ref($node_path)) {
180 return $node_path->string_value();
181 } else {
182 return $self->findvalue($node_path);
183 }
184}
185
186sub setNodeText {
187 my $self = shift;
188 my($node_path, $new_text) = @_;
189 my $nodeset = $self->findnodes($node_path);
190 return undef if (!defined $nodeset); # could not find node
191 my @nodes = $nodeset->get_nodelist;
192 if ($#nodes < 0) {
193 if ($node_path =~ m|/@([^/]+)$|) {
194 # attribute not found, so try to create it
195 my $parent_path = $`;
196 my $attr = $1;
197 $nodeset = $self->findnodes($parent_path);
198 return undef if (!defined $nodeset); # could not find node
199 foreach my $node ($nodeset->get_nodelist) {
200 my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text);
201 return undef if (!defined $newnode); # could not create new node
202 $node->appendAttribute($newnode);
203 }
204 } else {
205 return undef; # could not find node
206 }
207 }
208 foreach my $node (@nodes) {
209 if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) {
210 $node->setNodeValue($new_text);
211 } else {
212 foreach my $delnode ($node->getChildNodes()) {
213 $node->removeChild($delnode);
214 }
215 my $newnode = XML::XPath::Node::Text->new($new_text);
216 return undef if (!defined $newnode); # could not create new node
217 $node->appendChild($newnode);
218 }
219 }
220 return 1;
221}
222
223sub createNode {
224 my $self = shift;
225 my($node_path) = @_;
226 my $path_steps = $self->{path_parser}->parse($node_path);
227 my @path_steps = ();
228 foreach my $step (@{$path_steps->get_lhs()}) {
229 my $string = $step->as_string();
230 push(@path_steps, $string) if (defined $string && $string ne "");
231 }
232 my $prev_node = undef;
233 my $nodeset = undef;
234 my $nodes = undef;
235 my $p = undef;
236 my $test_path = "";
237 # Start with the deepest node, working up the path (right to left),
238 # trying to find a node that exists.
239 for ($p = $#path_steps; $p >= 0; $p--) {
240 my $path = $path_steps[$p];
241 $test_path = "(/" . join("/", @path_steps[0..$p]) . ")";
242 $nodeset = $self->findnodes($test_path);
243 return undef if (!defined $nodeset); # error looking for node
244 $nodes = $nodeset->size;
245 return undef if ($nodes > 1); # too many paths - path not specific enough
246 if ($nodes == 1) { # found a node -- need to create nodes below it
247 $prev_node = $nodeset->get_node(1);
248 last;
249 }
250 }
251 if (!defined $prev_node) {
252 my @root_nodes = $self->findnodes('/')->get_nodelist();
253 $prev_node = $root_nodes[0];
254 }
255 # We found a node that exists, or we'll start at the root.
256 # Create all lower nodes working left to right along the path.
257 for ($p++ ; $p <= $#path_steps; $p++) {
258 my $path = $path_steps[$p];
259 my $newnode = undef;
260 my($axis,$name) = ($path =~ /^(.*?)::(.*)$/);
261 if ($axis =~ /^child$/i) {
262 $newnode = XML::XPath::Node::Element->new($name);
263 return undef if (!defined $newnode); # could not create new node
264 $prev_node->appendChild($newnode);
265 } elsif ($axis =~ /^attribute$/i) {
266 $newnode = XML::XPath::Node::Attribute->new($name, "");
267 return undef if (!defined $newnode); # could not create new node
268 $prev_node->appendAttribute($newnode);
269 }
270 $prev_node = $newnode;
271 }
272 return $prev_node;
273}
274
275sub get_filename {
276 my $self = shift;
277 $self->{_filename};
278}
279
280sub set_filename {
281 my $self = shift;
282 $self->{_filename} = shift;
283}
284
285sub get_parser {
286 my $self = shift;
287 $self->{_parser};
288}
289
290sub set_parser {
291 my $self = shift;
292 $self->{_parser} = shift;
293}
294
295sub get_xml {
296 my $self = shift;
297 $self->{_xml};
298}
299
300sub set_xml {
301 my $self = shift;
302 $self->{_xml} = shift;
303}
304
305sub get_ioref {
306 my $self = shift;
307 $self->{_ioref};
308}
309
310sub set_ioref {
311 my $self = shift;
312 $self->{_ioref} = shift;
313}
314
315sub get_context {
316 my $self = shift;
317 $self->{_context};
318}
319
320sub set_context {
321 my $self = shift;
322 $self->{_context} = shift;
323}
324
325sub cleanup {
326 my $self = shift;
327 if ($XML::XPath::SafeMode) {
328 my $context = $self->get_context;
329 return unless $context;
330 $context->dispose;
331 }
332}
333
334sub set_namespace {
335 my $self = shift;
336 my ($prefix, $expanded) = @_;
337 $self->{path_parser}->set_namespace($prefix, $expanded);
338}
339
340sub clear_namespaces {
341 my $self = shift;
342 $self->{path_parser}->clear_namespaces();
343}
344
3451;
346__END__
347
348=head1 NAME
349
350XML::XPath - a set of modules for parsing and evaluating XPath statements
351
352=head1 DESCRIPTION
353
354This module aims to comply exactly to the XPath specification at
355http://www.w3.org/TR/xpath and yet allow extensions to be added in the
356form of functions. Modules such as XSLT and XPointer may need to do
357this as they support functionality beyond XPath.
358
359=head1 SYNOPSIS
360
361 use XML::XPath;
362 use XML::XPath::XMLParser;
363
364 my $xp = XML::XPath->new(filename => 'test.xhtml');
365
366 my $nodeset = $xp->find('/html/body/p'); # find all paragraphs
367
368 foreach my $node ($nodeset->get_nodelist) {
369 print "FOUND\n\n",
370 XML::XPath::XMLParser::as_string($node),
371 "\n\n";
372 }
373
374=head1 DETAILS
375
376There's an awful lot to all of this, so bear with it - if you stick it
377out it should be worth it. Please get a good understanding of XPath
378by reading the spec before asking me questions. All of the classes
379and parts herein are named to be synonimous with the names in the
380specification, so consult that if you don't understand why I'm doing
381something in the code.
382
383=head1 API
384
385The API of XML::XPath itself is extremely simple to allow you to get
386going almost immediately. The deeper API's are more complex, but you
387shouldn't have to touch most of that.
388
389=head2 new()
390
391This constructor follows the often seen named parameter method call.
392Parameters you can use are: filename, parser, xml, ioref and context.
393The filename parameter specifies an XML file to parse. The xml
394parameter specifies a string to parse, and the ioref parameter
395specifies an ioref to parse. The context option allows you to
396specify a context node. The context node has to be in the format
397of a node as specified in L<XML::XPath::XMLParser>. The 4 parameters
398filename, xml, ioref and context are mutually exclusive - you should
399only specify one (if you specify anything other than context, the
400context node is the root of your document).
401The parser option allows you to pass in an already prepared
402XML::Parser object, to save you having to create more than one
403in your application (if, for example, you're doing more than just XPath).
404
405 my $xp = XML::XPath->new( context => $node );
406
407It is very much recommended that you use only 1 XPath object throughout
408the life of your application. This is because the object (and it's sub-objects)
409maintain certain bits of state information that will be useful (such
410as XPath variables) to later calls to find(). It's also a good idea because
411you'll use less memory this way.
412
413=head2 I<nodeset> = find($path, [$context])
414
415The find function takes an XPath expression (a string) and returns either an
416XML::XPath::NodeSet object containing the nodes it found (or empty if
417no nodes matched the path), or one of XML::XPath::Literal (a string),
418XML::XPath::Number, or XML::XPath::Boolean. It should always return
419something - and you can use ->isa() to find out what it returned. If you
420need to check how many nodes it found you should check $nodeset->size.
421See L<XML::XPath::NodeSet>. An optional second parameter of a context
422node allows you to use this method repeatedly, for example XSLT needs
423to do this.
424
425=head2 findnodes($path, [$context])
426
427Returns a list of nodes found by $path, optionally in context $context.
428In scalar context returns an XML::XPath::NodeSet object.
429
430=head2 findnodes_as_string($path, [$context])
431
432Returns the nodes found reproduced as XML. The result is not guaranteed
433to be valid XML though.
434
435=head2 findvalue($path, [$context])
436
437Returns either a C<XML::XPath::Literal>, a C<XML::XPath::Boolean> or a
438C<XML::XPath::Number> object. If the path returns a NodeSet,
439$nodeset->to_literal is called automatically for you (and thus a
440C<XML::XPath::Literal> is returned). Note that
441for each of the objects stringification is overloaded, so you can just
442print the value found, or manipulate it in the ways you would a normal
443perl value (e.g. using regular expressions).
444
445=head2 exists($path, [$context])
446
447Returns true if the given path exists.
448
449=head2 matches($node, $path, [$context])
450
451Returns true if the node matches the path (optionally in context $context).
452
453=head2 getNodeText($path)
454
455Returns the text string for a particular XML node. Returns a string,
456or undef if the node doesn't exist.
457
458=head2 setNodeText($path, $text)
459
460Sets the text string for a particular XML node. The node can be an
461element or an attribute. If the node to be set is an attribute, and
462the attribute node does not exist, it will be created automatically.
463
464=head2 createNode($path)
465
466Creates the node matching the path given. If part of the path given, or
467all of the path do not exist, the necessary nodes will be created
468automatically.
469
470=head2 set_namespace($prefix, $uri)
471
472Sets the namespace prefix mapping to the uri.
473
474Normally in XML::XPath the prefixes in XPath node tests take their
475context from the current node. This means that foo:bar will always
476match an element <foo:bar> regardless of the namespace that the prefix
477foo is mapped to (which might even change within the document, resulting
478in unexpected results). In order to make prefixes in XPath node tests
479actually map to a real URI, you need to enable that via a call
480to the set_namespace method of your XML::XPath object.
481
482=head2 clear_namespaces()
483
484Clears all previously set namespace mappings.
485
486=head2 $XML::XPath::Namespaces
487
488Set this to 0 if you I<don't> want namespace processing to occur. This
489will make everything a little (tiny) bit faster, but you'll suffer for it,
490probably.
491
492=head1 Node Object Model
493
494See L<XML::XPath::Node>, L<XML::XPath::Node::Element>,
495L<XML::XPath::Node::Text>, L<XML::XPath::Node::Comment>,
496L<XML::XPath::Node::Attribute>, L<XML::XPath::Node::Namespace>,
497and L<XML::XPath::Node::PI>.
498
499=head1 On Garbage Collection
500
501XPath nodes work in a special way that allows circular references, and
502yet still lets Perl's reference counting garbage collector to clean up
503the nodes after use. This should be totally transparent to the user,
504with one caveat: B<If you free your tree before letting go of a sub-tree,
505consider that playing with fire and you may get burned>. What does this
506mean to the average user? Not much. Provided you don't free (or let go
507out of scope) either the tree you passed to XML::XPath->new, or if you
508didn't pass a tree, and passed a filename or IO-ref, then provided you
509don't let the XML::XPath object go out of scope before you let results
510of find() and its friends go out of scope, then you'll be fine. Even if
511you B<do> let the tree go out of scope before results, you'll probably
512still be fine. The only case where you may get stung is when the last
513part of your path/query is either an ancestor or parent axis. In that
514case the worst that will happen is you'll end up with a circular reference
515that won't get cleared until interpreter destruction time. You can get
516around that by explicitly calling $node->DESTROY on each of your result
517nodes, if you really need to do that.
518
519Mail me direct if that's not clear. Note that it's not doom and gloom. It's
520by no means perfect, but the worst that will happen is a long running process
521could leak memory. Most long running processes will therefore be able to
522explicitly be careful not to free the tree (or XML::XPath object) before
523freeing results. AxKit, an application that uses XML::XPath, does this and
524I didn't have to make any changes to the code - it's already sensible
525programming.
526
527If you I<really> don't want all this to happen, then set the variable
528$XML::XPath::SafeMode, and call $xp->cleanup() on the XML::XPath object
529when you're finished, or $tree->dispose() if you have a tree instead.
530
531=head1 Example
532
533Please see the test files in t/ for examples on how to use XPath.
534
535=head1 Support/Author
536
537This module is copyright 2000 AxKit.com Ltd. This is free
538software, and as such comes with NO WARRANTY. No dates are used in this
539module. You may distribute this module under the terms of either the
540Gnu GPL, or the Artistic License (the same terms as Perl itself).
541
542For support, please subscribe to the Perl-XML mailing list at the URL
543http://listserv.activestate.com/mailman/listinfo/perl-xml
544
545Matt Sergeant, [email protected]
546
547=head1 SEE ALSO
548
549L<XML::XPath::Literal>, L<XML::XPath::Boolean>, L<XML::XPath::Number>,
550L<XML::XPath::XMLParser>, L<XML::XPath::NodeSet>, L<XML::XPath::PerlSAX>,
551L<XML::XPath::Builder>.
552
553=cut
Note: See TracBrowser for help on using the repository browser.