Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gs2-extensions/parallel-building/trunk/src/perllib/cpan/XML/XPath.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago
An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)
File size: 16.7 KB

Line
1	# $Id: XPath.pm 7909 2004-08-06 05:11:55Z mdewsnip $
2
3	package XML::XPath;
4
5	use strict;
6	use vars qw($VERSION $AUTOLOAD $revision);
7
8	$VERSION = '1.13';
9
10	$XML::XPath::Namespaces = 1;
11	$XML::XPath::Debug = 0;
12
13	use XML::XPath::XMLParser;
14	use XML::XPath::Parser;
15	use IO::File;
16
17	# For testing
18	#use Data::Dumper;
19	#$Data::Dumper::Indent = 1;
20
21	# Parameters for new()
22	my @options = qw(
23	filename
24	parser
25	xml
26	ioref
27	context
28	);
29
30	sub new {
31	my $proto = shift;
32	my $class = ref($proto) \|\| $proto;
33
34	my(%args);
35	# Try to figure out what the user passed
36	if ($#_ == 0) { # passed a scalar
37	my $string = $_[0];
38	if ($string =~ m{<.*?>}s) { # it's an XML string
39	$args{'xml'} = $string;
40	} elsif (ref($string)) { # read XML from file handle
41	$args{'ioref'} = $string;
42	} elsif ($string eq '-') { # read XML from stdin
43	$args{'ioref'} = IO::File->new($string);
44	} else { # read XML from a file
45	$args{'filename'} = $string;
46	}
47	} else { # passed a hash or hash reference
48	# just pass the parameters on to the XPath constructor
49	%args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_);
50	}
51
52	if ($args{filename} && (!-e $args{filename} \|\| !-r $args{filename})) {
53	die "Cannot open file '$args{filename}'";
54	}
55	my %hash = map(( "_$_" => $args{$_} ), @options);
56	$hash{path_parser} = XML::XPath::Parser->new();
57	return bless \%hash, $class;
58	}
59
60	sub find {
61	my $self = shift;
62	my $path = shift;
63	my $context = shift;
64	die "No path to find" unless $path;
65
66	if (!defined $context) {
67	$context = $self->get_context;
68	}
69	if (!defined $context) {
70	# Still no context? Need to parse...
71	my $parser = XML::XPath::XMLParser->new(
72	filename => $self->get_filename,
73	xml => $self->get_xml,
74	ioref => $self->get_ioref,
75	parser => $self->get_parser,
76	);
77	$context = $parser->parse;
78	$self->set_context($context);
79	# warn "CONTEXT:\n", Data::Dumper->Dumpxs([$context], ['context']);
80	}
81
82	my $parsed_path = $self->{path_parser}->parse($path);
83	# warn "\n\nPATH: ", $parsed_path->as_string, "\n\n";
84
85	# warn "evaluating path\n";
86	return $parsed_path->evaluate($context);
87	}
88
89	# sub memsize {
90	# print STDERR @_, "\t";
91	# open(FH, '/proc/self/status');
92	# while(<FH>) {
93	# print STDERR $_ if /^VmSize/;
94	# }
95	# close FH;
96	# }
97	#
98	sub findnodes {
99	my $self = shift;
100	my ($path, $context) = @_;
101
102	my $results = $self->find($path, $context);
103
104	if ($results->isa('XML::XPath::NodeSet')) {
105	return wantarray ? $results->get_nodelist : $results;
106	# return $results->get_nodelist;
107	}
108
109	# warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug;
110	return wantarray ? () : XML::XPath::NodeSet->new();
111	}
112
113	sub matches {
114	my $self = shift;
115	my ($node, $path, $context) = @_;
116
117	my @nodes = $self->findnodes($path, $context);
118
119	if (grep { "$node" eq "$_" } @nodes) {
120	return 1;
121	}
122	return;
123	}
124
125	sub findnodes_as_string {
126	my $self = shift;
127	my ($path, $context) = @_;
128
129	my $results = $self->find($path, $context);
130
131	if ($results->isa('XML::XPath::NodeSet')) {
132	return join('', map { $_->toString } $results->get_nodelist);
133	}
134	elsif ($results->isa('XML::XPath::Node')) {
135	return $results->toString;
136	}
137	else {
138	return XML::XPath::Node::XMLescape($results->value);
139	}
140	}
141
142	sub findvalue {
143	my $self = shift;
144	my ($path, $context) = @_;
145
146	my $results = $self->find($path, $context);
147
148	if ($results->isa('XML::XPath::NodeSet')) {
149	return $results->to_literal;
150	}
151
152	return $results;
153	}
154
155	sub exists
156	{
157	my $self = shift;
158	my ($path, $context) = @_;
159	$path = '/' if (!defined $path);
160	my @nodeset = $self->findnodes($path, $context);
161	return 1 if (scalar( @nodeset ));
162	return 0;
163	}
164
165	sub getNodeAsXML {
166	my $self = shift;
167	my $node_path = shift;
168	$node_path = '/' if (!defined $node_path);
169	if (ref($node_path)) {
170	return $node_path->as_string();
171	} else {
172	return $self->findnodes_as_string($node_path);
173	}
174	}
175
176	sub getNodeText {
177	my $self = shift;
178	my $node_path = shift;
179	if (ref($node_path)) {
180	return $node_path->string_value();
181	} else {
182	return $self->findvalue($node_path);
183	}
184	}
185
186	sub setNodeText {
187	my $self = shift;
188	my($node_path, $new_text) = @_;
189	my $nodeset = $self->findnodes($node_path);
190	return undef if (!defined $nodeset); # could not find node
191	my @nodes = $nodeset->get_nodelist;
192	if ($#nodes < 0) {
193	if ($node_path =~ m\|/@([^/]+)$\|) {
194	# attribute not found, so try to create it
195	my $parent_path = $`;
196	my $attr = $1;
197	$nodeset = $self->findnodes($parent_path);
198	return undef if (!defined $nodeset); # could not find node
199	foreach my $node ($nodeset->get_nodelist) {
200	my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text);
201	return undef if (!defined $newnode); # could not create new node
202	$node->appendAttribute($newnode);
203	}
204	} else {
205	return undef; # could not find node
206	}
207	}
208	foreach my $node (@nodes) {
209	if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) {
210	$node->setNodeValue($new_text);
211	} else {
212	foreach my $delnode ($node->getChildNodes()) {
213	$node->removeChild($delnode);
214	}
215	my $newnode = XML::XPath::Node::Text->new($new_text);
216	return undef if (!defined $newnode); # could not create new node
217	$node->appendChild($newnode);
218	}
219	}
220	return 1;
221	}
222
223	sub createNode {
224	my $self = shift;
225	my($node_path) = @_;
226	my $path_steps = $self->{path_parser}->parse($node_path);
227	my @path_steps = ();
228	foreach my $step (@{$path_steps->get_lhs()}) {
229	my $string = $step->as_string();
230	push(@path_steps, $string) if (defined $string && $string ne "");
231	}
232	my $prev_node = undef;
233	my $nodeset = undef;
234	my $nodes = undef;
235	my $p = undef;
236	my $test_path = "";
237	# Start with the deepest node, working up the path (right to left),
238	# trying to find a node that exists.
239	for ($p = $#path_steps; $p >= 0; $p--) {
240	my $path = $path_steps[$p];
241	$test_path = "(/" . join("/", @path_steps[0..$p]) . ")";
242	$nodeset = $self->findnodes($test_path);
243	return undef if (!defined $nodeset); # error looking for node
244	$nodes = $nodeset->size;
245	return undef if ($nodes > 1); # too many paths - path not specific enough
246	if ($nodes == 1) { # found a node -- need to create nodes below it
247	$prev_node = $nodeset->get_node(1);
248	last;
249	}
250	}
251	if (!defined $prev_node) {
252	my @root_nodes = $self->findnodes('/')->get_nodelist();
253	$prev_node = $root_nodes[0];
254	}
255	# We found a node that exists, or we'll start at the root.
256	# Create all lower nodes working left to right along the path.
257	for ($p++ ; $p <= $#path_steps; $p++) {
258	my $path = $path_steps[$p];
259	my $newnode = undef;
260	my($axis,$name) = ($path =~ /^(.?)::(.)$/);
261	if ($axis =~ /^child$/i) {
262	$newnode = XML::XPath::Node::Element->new($name);
263	return undef if (!defined $newnode); # could not create new node
264	$prev_node->appendChild($newnode);
265	} elsif ($axis =~ /^attribute$/i) {
266	$newnode = XML::XPath::Node::Attribute->new($name, "");
267	return undef if (!defined $newnode); # could not create new node
268	$prev_node->appendAttribute($newnode);
269	}
270	$prev_node = $newnode;
271	}
272	return $prev_node;
273	}
274
275	sub get_filename {
276	my $self = shift;
277	$self->{_filename};
278	}
279
280	sub set_filename {
281	my $self = shift;
282	$self->{_filename} = shift;
283	}
284
285	sub get_parser {
286	my $self = shift;
287	$self->{_parser};
288	}
289
290	sub set_parser {
291	my $self = shift;
292	$self->{_parser} = shift;
293	}
294
295	sub get_xml {
296	my $self = shift;
297	$self->{_xml};
298	}
299
300	sub set_xml {
301	my $self = shift;
302	$self->{_xml} = shift;
303	}
304
305	sub get_ioref {
306	my $self = shift;
307	$self->{_ioref};
308	}
309
310	sub set_ioref {
311	my $self = shift;
312	$self->{_ioref} = shift;
313	}
314
315	sub get_context {
316	my $self = shift;
317	$self->{_context};
318	}
319
320	sub set_context {
321	my $self = shift;
322	$self->{_context} = shift;
323	}
324
325	sub cleanup {
326	my $self = shift;
327	if ($XML::XPath::SafeMode) {
328	my $context = $self->get_context;
329	return unless $context;
330	$context->dispose;
331	}
332	}
333
334	sub set_namespace {
335	my $self = shift;
336	my ($prefix, $expanded) = @_;
337	$self->{path_parser}->set_namespace($prefix, $expanded);
338	}
339
340	sub clear_namespaces {
341	my $self = shift;
342	$self->{path_parser}->clear_namespaces();
343	}
344
345	1;
346	__END__
347
348	=head1 NAME
349
350	XML::XPath - a set of modules for parsing and evaluating XPath statements
351
352	=head1 DESCRIPTION
353
354	This module aims to comply exactly to the XPath specification at
355	http://www.w3.org/TR/xpath and yet allow extensions to be added in the
356	form of functions. Modules such as XSLT and XPointer may need to do
357	this as they support functionality beyond XPath.
358
359	=head1 SYNOPSIS
360
361	use XML::XPath;
362	use XML::XPath::XMLParser;
363
364	my $xp = XML::XPath->new(filename => 'test.xhtml');
365
366	my $nodeset = $xp->find('/html/body/p'); # find all paragraphs
367
368	foreach my $node ($nodeset->get_nodelist) {
369	print "FOUND\n\n",
370	XML::XPath::XMLParser::as_string($node),
371	"\n\n";
372	}
373
374	=head1 DETAILS
375
376	There's an awful lot to all of this, so bear with it - if you stick it
377	out it should be worth it. Please get a good understanding of XPath
378	by reading the spec before asking me questions. All of the classes
379	and parts herein are named to be synonimous with the names in the
380	specification, so consult that if you don't understand why I'm doing
381	something in the code.
382
383	=head1 API
384
385	The API of XML::XPath itself is extremely simple to allow you to get
386	going almost immediately. The deeper API's are more complex, but you
387	shouldn't have to touch most of that.
388
389	=head2 new()
390
391	This constructor follows the often seen named parameter method call.
392	Parameters you can use are: filename, parser, xml, ioref and context.
393	The filename parameter specifies an XML file to parse. The xml
394	parameter specifies a string to parse, and the ioref parameter
395	specifies an ioref to parse. The context option allows you to
396	specify a context node. The context node has to be in the format
397	of a node as specified in L<XML::XPath::XMLParser>. The 4 parameters
398	filename, xml, ioref and context are mutually exclusive - you should
399	only specify one (if you specify anything other than context, the
400	context node is the root of your document).
401	The parser option allows you to pass in an already prepared
402	XML::Parser object, to save you having to create more than one
403	in your application (if, for example, you're doing more than just XPath).
404
405	my $xp = XML::XPath->new( context => $node );
406
407	It is very much recommended that you use only 1 XPath object throughout
408	the life of your application. This is because the object (and it's sub-objects)
409	maintain certain bits of state information that will be useful (such
410	as XPath variables) to later calls to find(). It's also a good idea because
411	you'll use less memory this way.
412
413	=head2 I<nodeset> = find($path, [$context])
414
415	The find function takes an XPath expression (a string) and returns either an
416	XML::XPath::NodeSet object containing the nodes it found (or empty if
417	no nodes matched the path), or one of XML::XPath::Literal (a string),
418	XML::XPath::Number, or XML::XPath::Boolean. It should always return
419	something - and you can use ->isa() to find out what it returned. If you
420	need to check how many nodes it found you should check $nodeset->size.
421	See L<XML::XPath::NodeSet>. An optional second parameter of a context
422	node allows you to use this method repeatedly, for example XSLT needs
423	to do this.
424
425	=head2 findnodes($path, [$context])
426
427	Returns a list of nodes found by $path, optionally in context $context.
428	In scalar context returns an XML::XPath::NodeSet object.
429
430	=head2 findnodes_as_string($path, [$context])
431
432	Returns the nodes found reproduced as XML. The result is not guaranteed
433	to be valid XML though.
434
435	=head2 findvalue($path, [$context])
436
437	Returns either a C<XML::XPath::Literal>, a C<XML::XPath::Boolean> or a
438	C<XML::XPath::Number> object. If the path returns a NodeSet,
439	$nodeset->to_literal is called automatically for you (and thus a
440	C<XML::XPath::Literal> is returned). Note that
441	for each of the objects stringification is overloaded, so you can just
442	print the value found, or manipulate it in the ways you would a normal
443	perl value (e.g. using regular expressions).
444
445	=head2 exists($path, [$context])
446
447	Returns true if the given path exists.
448
449	=head2 matches($node, $path, [$context])
450
451	Returns true if the node matches the path (optionally in context $context).
452
453	=head2 getNodeText($path)
454
455	Returns the text string for a particular XML node. Returns a string,
456	or undef if the node doesn't exist.
457
458	=head2 setNodeText($path, $text)
459
460	Sets the text string for a particular XML node. The node can be an
461	element or an attribute. If the node to be set is an attribute, and
462	the attribute node does not exist, it will be created automatically.
463
464	=head2 createNode($path)
465
466	Creates the node matching the path given. If part of the path given, or
467	all of the path do not exist, the necessary nodes will be created
468	automatically.
469
470	=head2 set_namespace($prefix, $uri)
471
472	Sets the namespace prefix mapping to the uri.
473
474	Normally in XML::XPath the prefixes in XPath node tests take their
475	context from the current node. This means that foo:bar will always
476	match an element <foo:bar> regardless of the namespace that the prefix
477	foo is mapped to (which might even change within the document, resulting
478	in unexpected results). In order to make prefixes in XPath node tests
479	actually map to a real URI, you need to enable that via a call
480	to the set_namespace method of your XML::XPath object.
481
482	=head2 clear_namespaces()
483
484	Clears all previously set namespace mappings.
485
486	=head2 $XML::XPath::Namespaces
487
488	Set this to 0 if you I<don't> want namespace processing to occur. This
489	will make everything a little (tiny) bit faster, but you'll suffer for it,
490	probably.
491
492	=head1 Node Object Model
493
494	See L<XML::XPath::Node>, L<XML::XPath::Node::Element>,
495	L<XML::XPath::Node::Text>, L<XML::XPath::Node::Comment>,
496	L<XML::XPath::Node::Attribute>, L<XML::XPath::Node::Namespace>,
497	and L<XML::XPath::Node::PI>.
498
499	=head1 On Garbage Collection
500
501	XPath nodes work in a special way that allows circular references, and
502	yet still lets Perl's reference counting garbage collector to clean up
503	the nodes after use. This should be totally transparent to the user,
504	with one caveat: B<If you free your tree before letting go of a sub-tree,
505	consider that playing with fire and you may get burned>. What does this
506	mean to the average user? Not much. Provided you don't free (or let go
507	out of scope) either the tree you passed to XML::XPath->new, or if you
508	didn't pass a tree, and passed a filename or IO-ref, then provided you
509	don't let the XML::XPath object go out of scope before you let results
510	of find() and its friends go out of scope, then you'll be fine. Even if
511	you B<do> let the tree go out of scope before results, you'll probably
512	still be fine. The only case where you may get stung is when the last
513	part of your path/query is either an ancestor or parent axis. In that
514	case the worst that will happen is you'll end up with a circular reference
515	that won't get cleared until interpreter destruction time. You can get
516	around that by explicitly calling $node->DESTROY on each of your result
517	nodes, if you really need to do that.
518
519	Mail me direct if that's not clear. Note that it's not doom and gloom. It's
520	by no means perfect, but the worst that will happen is a long running process
521	could leak memory. Most long running processes will therefore be able to
522	explicitly be careful not to free the tree (or XML::XPath object) before
523	freeing results. AxKit, an application that uses XML::XPath, does this and
524	I didn't have to make any changes to the code - it's already sensible
525	programming.
526
527	If you I<really> don't want all this to happen, then set the variable
528	$XML::XPath::SafeMode, and call $xp->cleanup() on the XML::XPath object
529	when you're finished, or $tree->dispose() if you have a tree instead.
530
531	=head1 Example
532
533	Please see the test files in t/ for examples on how to use XPath.
534
535	=head1 Support/Author
536
537	This module is copyright 2000 AxKit.com Ltd. This is free
538	software, and as such comes with NO WARRANTY. No dates are used in this
539	module. You may distribute this module under the terms of either the
540	Gnu GPL, or the Artistic License (the same terms as Perl itself).
541
542	For support, please subscribe to the Perl-XML mailing list at the URL
543	http://listserv.activestate.com/mailman/listinfo/perl-xml
544
545	Matt Sergeant, [email protected]
546
547	=head1 SEE ALSO
548
549	L<XML::XPath::Literal>, L<XML::XPath::Boolean>, L<XML::XPath::Number>,
550	L<XML::XPath::XMLParser>, L<XML::XPath::NodeSet>, L<XML::XPath::PerlSAX>,
551	L<XML::XPath::Builder>.
552
553	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: