[7909] | 1 | # $Id: Parser.pm 7909 2004-08-06 05:11:55Z mdewsnip $
|
---|
| 2 |
|
---|
| 3 | package XML::XPath::Parser;
|
---|
| 4 |
|
---|
| 5 | use strict;
|
---|
| 6 | use vars qw/
|
---|
| 7 | $NCName
|
---|
| 8 | $QName
|
---|
| 9 | $NCWild
|
---|
| 10 | $QNWild
|
---|
| 11 | $NUMBER_RE
|
---|
| 12 | $NODE_TYPE
|
---|
| 13 | $AXIS_NAME
|
---|
| 14 | %AXES
|
---|
| 15 | $LITERAL
|
---|
| 16 | %CACHE/;
|
---|
| 17 |
|
---|
| 18 | use XML::XPath::XMLParser;
|
---|
| 19 | use XML::XPath::Step;
|
---|
| 20 | use XML::XPath::Expr;
|
---|
| 21 | use XML::XPath::Function;
|
---|
| 22 | use XML::XPath::LocationPath;
|
---|
| 23 | use XML::XPath::Variable;
|
---|
| 24 | use XML::XPath::Literal;
|
---|
| 25 | use XML::XPath::Number;
|
---|
| 26 | use XML::XPath::NodeSet;
|
---|
| 27 |
|
---|
| 28 | # Axis name to principal node type mapping
|
---|
| 29 | %AXES = (
|
---|
| 30 | 'ancestor' => 'element',
|
---|
| 31 | 'ancestor-or-self' => 'element',
|
---|
| 32 | 'attribute' => 'attribute',
|
---|
| 33 | 'namespace' => 'namespace',
|
---|
| 34 | 'child' => 'element',
|
---|
| 35 | 'descendant' => 'element',
|
---|
| 36 | 'descendant-or-self' => 'element',
|
---|
| 37 | 'following' => 'element',
|
---|
| 38 | 'following-sibling' => 'element',
|
---|
| 39 | 'parent' => 'element',
|
---|
| 40 | 'preceding' => 'element',
|
---|
| 41 | 'preceding-sibling' => 'element',
|
---|
| 42 | 'self' => 'element',
|
---|
| 43 | );
|
---|
| 44 |
|
---|
| 45 | $NCName = '([A-Za-z_][\w\\.\\-]*)';
|
---|
| 46 | $QName = "($NCName:)?$NCName";
|
---|
| 47 | $NCWild = "${NCName}:\\*";
|
---|
| 48 | $QNWild = "\\*";
|
---|
| 49 | $NODE_TYPE = '((text|comment|processing-instruction|node)\\(\\))';
|
---|
| 50 | $AXIS_NAME = '(' . join('|', keys %AXES) . ')::';
|
---|
| 51 | $NUMBER_RE = '\d+(\\.\d*)?|\\.\d+';
|
---|
| 52 | $LITERAL = '\\"[^\\"]*\\"|\\\'[^\\\']*\\\'';
|
---|
| 53 |
|
---|
| 54 | sub new {
|
---|
| 55 | my $class = shift;
|
---|
| 56 | my $self = bless {}, $class;
|
---|
| 57 | debug("New Parser being created.\n");
|
---|
| 58 | $self->{context_set} = XML::XPath::NodeSet->new();
|
---|
| 59 | $self->{context_pos} = undef; # 1 based position in array context
|
---|
| 60 | $self->{context_size} = 0; # total size of context
|
---|
| 61 | $self->clear_namespaces();
|
---|
| 62 | $self->{vars} = {};
|
---|
| 63 | $self->{direction} = 'forward';
|
---|
| 64 | $self->{cache} = {};
|
---|
| 65 | return $self;
|
---|
| 66 | }
|
---|
| 67 |
|
---|
| 68 | sub get_var {
|
---|
| 69 | my $self = shift;
|
---|
| 70 | my $var = shift;
|
---|
| 71 | $self->{vars}->{$var};
|
---|
| 72 | }
|
---|
| 73 |
|
---|
| 74 | sub set_var {
|
---|
| 75 | my $self = shift;
|
---|
| 76 | my $var = shift;
|
---|
| 77 | my $val = shift;
|
---|
| 78 | $self->{vars}->{$var} = $val;
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 | sub set_namespace {
|
---|
| 82 | my $self = shift;
|
---|
| 83 | my ($prefix, $expanded) = @_;
|
---|
| 84 | $self->{namespaces}{$prefix} = $expanded;
|
---|
| 85 | }
|
---|
| 86 |
|
---|
| 87 | sub clear_namespaces {
|
---|
| 88 | my $self = shift;
|
---|
| 89 | $self->{namespaces} = {};
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | sub get_namespace {
|
---|
| 93 | my $self = shift;
|
---|
| 94 | my ($prefix, $node) = @_;
|
---|
| 95 | if (my $ns = $self->{namespaces}{$prefix}) {
|
---|
| 96 | return $ns;
|
---|
| 97 | }
|
---|
| 98 | if (my $nsnode = $node->getNamespace($prefix)) {
|
---|
| 99 | return $nsnode->getValue();
|
---|
| 100 | }
|
---|
| 101 | }
|
---|
| 102 |
|
---|
| 103 | sub get_context_set { $_[0]->{context_set}; }
|
---|
| 104 | sub set_context_set { $_[0]->{context_set} = $_[1]; }
|
---|
| 105 | sub get_context_pos { $_[0]->{context_pos}; }
|
---|
| 106 | sub set_context_pos { $_[0]->{context_pos} = $_[1]; }
|
---|
| 107 | sub get_context_size { $_[0]->{context_set}->size; }
|
---|
| 108 | sub get_context_node { $_[0]->{context_set}->get_node($_[0]->{context_pos}); }
|
---|
| 109 |
|
---|
| 110 | sub my_sub {
|
---|
| 111 | return (caller(1))[3];
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | sub parse {
|
---|
| 115 | my $self = shift;
|
---|
| 116 | my $path = shift;
|
---|
| 117 | if ($CACHE{$path}) {
|
---|
| 118 | return $CACHE{$path};
|
---|
| 119 | }
|
---|
| 120 | my $tokens = $self->tokenize($path);
|
---|
| 121 |
|
---|
| 122 | $self->{_tokpos} = 0;
|
---|
| 123 | my $tree = $self->analyze($tokens);
|
---|
| 124 |
|
---|
| 125 | if ($self->{_tokpos} < scalar(@$tokens)) {
|
---|
| 126 | # didn't manage to parse entire expression - throw an exception
|
---|
| 127 | die "Parse of expression $path failed - junk after end of expression: $tokens->[$self->{_tokpos}]";
|
---|
| 128 | }
|
---|
| 129 |
|
---|
| 130 | $CACHE{$path} = $tree;
|
---|
| 131 |
|
---|
| 132 | debug("PARSED Expr to:\n", $tree->as_string, "\n") if $XML::XPath::Debug;
|
---|
| 133 |
|
---|
| 134 | return $tree;
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 | sub tokenize {
|
---|
| 138 | my $self = shift;
|
---|
| 139 | my $path = shift;
|
---|
| 140 | study $path;
|
---|
| 141 |
|
---|
| 142 | my @tokens;
|
---|
| 143 |
|
---|
| 144 | debug("Parsing: $path\n");
|
---|
| 145 |
|
---|
| 146 | # Bug: We don't allow "'@' NodeType" which is in the grammar, but I think is just plain stupid.
|
---|
| 147 |
|
---|
| 148 | while($path =~ m/\G
|
---|
| 149 | \s* # ignore all whitespace
|
---|
| 150 | ( # tokens
|
---|
| 151 | $LITERAL|
|
---|
| 152 | $NUMBER_RE| # Match digits
|
---|
| 153 | \.\.| # match parent
|
---|
| 154 | \.| # match current
|
---|
| 155 | ($AXIS_NAME)?$NODE_TYPE| # match tests
|
---|
| 156 | processing-instruction|
|
---|
| 157 | \@($NCWild|$QName|$QNWild)| # match attrib
|
---|
| 158 | \$$QName| # match variable reference
|
---|
| 159 | ($AXIS_NAME)?($NCWild|$QName|$QNWild)| # match NCName,NodeType,Axis::Test
|
---|
| 160 | \!=|<=|\-|>=|\/\/|and|or|mod|div| # multi-char seps
|
---|
| 161 | [,\+=\|<>\/\(\[\]\)]| # single char seps
|
---|
| 162 | (?<!(\@|\(|\[))\*| # multiply operator rules (see xpath spec)
|
---|
| 163 | (?<!::)\*|
|
---|
| 164 | $ # match end of query
|
---|
| 165 | )
|
---|
| 166 | \s* # ignore all whitespace
|
---|
| 167 | /gcxso) {
|
---|
| 168 |
|
---|
| 169 | my ($token) = ($1);
|
---|
| 170 |
|
---|
| 171 | if (length($token)) {
|
---|
| 172 | debug("TOKEN: $token\n");
|
---|
| 173 | push @tokens, $token;
|
---|
| 174 | }
|
---|
| 175 |
|
---|
| 176 | }
|
---|
| 177 |
|
---|
| 178 | if (pos($path) < length($path)) {
|
---|
| 179 | my $marker = ("." x (pos($path)-1));
|
---|
| 180 | $path = substr($path, 0, pos($path) + 8) . "...";
|
---|
| 181 | $path =~ s/\n/ /g;
|
---|
| 182 | $path =~ s/\t/ /g;
|
---|
| 183 | die "Query:\n",
|
---|
| 184 | "$path\n",
|
---|
| 185 | $marker, "^^^\n",
|
---|
| 186 | "Invalid query somewhere around here (I think)\n";
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | return \@tokens;
|
---|
| 190 | }
|
---|
| 191 |
|
---|
| 192 | sub analyze {
|
---|
| 193 | my $self = shift;
|
---|
| 194 | my $tokens = shift;
|
---|
| 195 | # lexical analysis
|
---|
| 196 |
|
---|
| 197 | return Expr($self, $tokens);
|
---|
| 198 | }
|
---|
| 199 |
|
---|
| 200 | sub match {
|
---|
| 201 | my ($self, $tokens, $match, $fatal) = @_;
|
---|
| 202 |
|
---|
| 203 | $self->{_curr_match} = '';
|
---|
| 204 | return 0 unless $self->{_tokpos} < @$tokens;
|
---|
| 205 |
|
---|
| 206 | local $^W;
|
---|
| 207 |
|
---|
| 208 | # debug ("match: $match\n");
|
---|
| 209 |
|
---|
| 210 | if ($tokens->[$self->{_tokpos}] =~ /^$match$/) {
|
---|
| 211 | $self->{_curr_match} = $tokens->[$self->{_tokpos}];
|
---|
| 212 | $self->{_tokpos}++;
|
---|
| 213 | return 1;
|
---|
| 214 | }
|
---|
| 215 | else {
|
---|
| 216 | if ($fatal) {
|
---|
| 217 | die "Invalid token: ", $tokens->[$self->{_tokpos}], "\n";
|
---|
| 218 | }
|
---|
| 219 | else {
|
---|
| 220 | return 0;
|
---|
| 221 | }
|
---|
| 222 | }
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 | sub Expr {
|
---|
| 226 | my ($self, $tokens) = @_;
|
---|
| 227 |
|
---|
| 228 | debug("in SUB\n");
|
---|
| 229 |
|
---|
| 230 | return OrExpr($self, $tokens);
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 | sub OrExpr {
|
---|
| 234 | my ($self, $tokens) = @_;
|
---|
| 235 |
|
---|
| 236 | debug("in SUB\n");
|
---|
| 237 |
|
---|
| 238 | my $expr = AndExpr($self, $tokens);
|
---|
| 239 | while (match($self, $tokens, 'or')) {
|
---|
| 240 | my $or_expr = XML::XPath::Expr->new($self);
|
---|
| 241 | $or_expr->set_lhs($expr);
|
---|
| 242 | $or_expr->set_op('or');
|
---|
| 243 |
|
---|
| 244 | my $rhs = AndExpr($self, $tokens);
|
---|
| 245 |
|
---|
| 246 | $or_expr->set_rhs($rhs);
|
---|
| 247 | $expr = $or_expr;
|
---|
| 248 | }
|
---|
| 249 |
|
---|
| 250 | return $expr;
|
---|
| 251 | }
|
---|
| 252 |
|
---|
| 253 | sub AndExpr {
|
---|
| 254 | my ($self, $tokens) = @_;
|
---|
| 255 |
|
---|
| 256 | debug("in SUB\n");
|
---|
| 257 |
|
---|
| 258 | my $expr = EqualityExpr($self, $tokens);
|
---|
| 259 | while (match($self, $tokens, 'and')) {
|
---|
| 260 | my $and_expr = XML::XPath::Expr->new($self);
|
---|
| 261 | $and_expr->set_lhs($expr);
|
---|
| 262 | $and_expr->set_op('and');
|
---|
| 263 |
|
---|
| 264 | my $rhs = EqualityExpr($self, $tokens);
|
---|
| 265 |
|
---|
| 266 | $and_expr->set_rhs($rhs);
|
---|
| 267 | $expr = $and_expr;
|
---|
| 268 | }
|
---|
| 269 |
|
---|
| 270 | return $expr;
|
---|
| 271 | }
|
---|
| 272 |
|
---|
| 273 | sub EqualityExpr {
|
---|
| 274 | my ($self, $tokens) = @_;
|
---|
| 275 |
|
---|
| 276 | debug("in SUB\n");
|
---|
| 277 |
|
---|
| 278 | my $expr = RelationalExpr($self, $tokens);
|
---|
| 279 | while (match($self, $tokens, '!?=')) {
|
---|
| 280 | my $eq_expr = XML::XPath::Expr->new($self);
|
---|
| 281 | $eq_expr->set_lhs($expr);
|
---|
| 282 | $eq_expr->set_op($self->{_curr_match});
|
---|
| 283 |
|
---|
| 284 | my $rhs = RelationalExpr($self, $tokens);
|
---|
| 285 |
|
---|
| 286 | $eq_expr->set_rhs($rhs);
|
---|
| 287 | $expr = $eq_expr;
|
---|
| 288 | }
|
---|
| 289 |
|
---|
| 290 | return $expr;
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | sub RelationalExpr {
|
---|
| 294 | my ($self, $tokens) = @_;
|
---|
| 295 |
|
---|
| 296 | debug("in SUB\n");
|
---|
| 297 |
|
---|
| 298 | my $expr = AdditiveExpr($self, $tokens);
|
---|
| 299 | while (match($self, $tokens, '(<|>|<=|>=)')) {
|
---|
| 300 | my $rel_expr = XML::XPath::Expr->new($self);
|
---|
| 301 | $rel_expr->set_lhs($expr);
|
---|
| 302 | $rel_expr->set_op($self->{_curr_match});
|
---|
| 303 |
|
---|
| 304 | my $rhs = AdditiveExpr($self, $tokens);
|
---|
| 305 |
|
---|
| 306 | $rel_expr->set_rhs($rhs);
|
---|
| 307 | $expr = $rel_expr;
|
---|
| 308 | }
|
---|
| 309 |
|
---|
| 310 | return $expr;
|
---|
| 311 | }
|
---|
| 312 |
|
---|
| 313 | sub AdditiveExpr {
|
---|
| 314 | my ($self, $tokens) = @_;
|
---|
| 315 |
|
---|
| 316 | debug("in SUB\n");
|
---|
| 317 |
|
---|
| 318 | my $expr = MultiplicativeExpr($self, $tokens);
|
---|
| 319 | while (match($self, $tokens, '[\\+\\-]')) {
|
---|
| 320 | my $add_expr = XML::XPath::Expr->new($self);
|
---|
| 321 | $add_expr->set_lhs($expr);
|
---|
| 322 | $add_expr->set_op($self->{_curr_match});
|
---|
| 323 |
|
---|
| 324 | my $rhs = MultiplicativeExpr($self, $tokens);
|
---|
| 325 |
|
---|
| 326 | $add_expr->set_rhs($rhs);
|
---|
| 327 | $expr = $add_expr;
|
---|
| 328 | }
|
---|
| 329 |
|
---|
| 330 | return $expr;
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | sub MultiplicativeExpr {
|
---|
| 334 | my ($self, $tokens) = @_;
|
---|
| 335 |
|
---|
| 336 | debug("in SUB\n");
|
---|
| 337 |
|
---|
| 338 | my $expr = UnaryExpr($self, $tokens);
|
---|
| 339 | while (match($self, $tokens, '(\\*|div|mod)')) {
|
---|
| 340 | my $mult_expr = XML::XPath::Expr->new($self);
|
---|
| 341 | $mult_expr->set_lhs($expr);
|
---|
| 342 | $mult_expr->set_op($self->{_curr_match});
|
---|
| 343 |
|
---|
| 344 | my $rhs = UnaryExpr($self, $tokens);
|
---|
| 345 |
|
---|
| 346 | $mult_expr->set_rhs($rhs);
|
---|
| 347 | $expr = $mult_expr;
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 | return $expr;
|
---|
| 351 | }
|
---|
| 352 |
|
---|
| 353 | sub UnaryExpr {
|
---|
| 354 | my ($self, $tokens) = @_;
|
---|
| 355 |
|
---|
| 356 | debug("in SUB\n");
|
---|
| 357 |
|
---|
| 358 | if (match($self, $tokens, '-')) {
|
---|
| 359 | my $expr = XML::XPath::Expr->new($self);
|
---|
| 360 | $expr->set_lhs(XML::XPath::Number->new(0));
|
---|
| 361 | $expr->set_op('-');
|
---|
| 362 | $expr->set_rhs(UnaryExpr($self, $tokens));
|
---|
| 363 | return $expr;
|
---|
| 364 | }
|
---|
| 365 | else {
|
---|
| 366 | return UnionExpr($self, $tokens);
|
---|
| 367 | }
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | sub UnionExpr {
|
---|
| 371 | my ($self, $tokens) = @_;
|
---|
| 372 |
|
---|
| 373 | debug("in SUB\n");
|
---|
| 374 |
|
---|
| 375 | my $expr = PathExpr($self, $tokens);
|
---|
| 376 | while (match($self, $tokens, '\\|')) {
|
---|
| 377 | my $un_expr = XML::XPath::Expr->new($self);
|
---|
| 378 | $un_expr->set_lhs($expr);
|
---|
| 379 | $un_expr->set_op('|');
|
---|
| 380 |
|
---|
| 381 | my $rhs = PathExpr($self, $tokens);
|
---|
| 382 |
|
---|
| 383 | $un_expr->set_rhs($rhs);
|
---|
| 384 | $expr = $un_expr;
|
---|
| 385 | }
|
---|
| 386 |
|
---|
| 387 | return $expr;
|
---|
| 388 | }
|
---|
| 389 |
|
---|
| 390 | sub PathExpr {
|
---|
| 391 | my ($self, $tokens) = @_;
|
---|
| 392 |
|
---|
| 393 | debug("in SUB\n");
|
---|
| 394 |
|
---|
| 395 | # PathExpr is LocationPath | FilterExpr | FilterExpr '//?' RelativeLocationPath
|
---|
| 396 |
|
---|
| 397 | # Since we are being predictive we need to find out which function to call next, then.
|
---|
| 398 |
|
---|
| 399 | # LocationPath either starts with "/", "//", ".", ".." or a proper Step.
|
---|
| 400 |
|
---|
| 401 | my $expr = XML::XPath::Expr->new($self);
|
---|
| 402 |
|
---|
| 403 | my $test = $tokens->[$self->{_tokpos}];
|
---|
| 404 |
|
---|
| 405 | # Test for AbsoluteLocationPath and AbbreviatedRelativeLocationPath
|
---|
| 406 | if ($test =~ /^(\/\/?|\.\.?)$/) {
|
---|
| 407 | # LocationPath
|
---|
| 408 | $expr->set_lhs(LocationPath($self, $tokens));
|
---|
| 409 | }
|
---|
| 410 | # Test for AxisName::...
|
---|
| 411 | elsif (is_step($self, $tokens)) {
|
---|
| 412 | $expr->set_lhs(LocationPath($self, $tokens));
|
---|
| 413 | }
|
---|
| 414 | else {
|
---|
| 415 | # Not a LocationPath
|
---|
| 416 | # Use FilterExpr instead:
|
---|
| 417 |
|
---|
| 418 | $expr = FilterExpr($self, $tokens);
|
---|
| 419 | if (match($self, $tokens, '//?')) {
|
---|
| 420 | my $loc_path = XML::XPath::LocationPath->new();
|
---|
| 421 | push @$loc_path, $expr;
|
---|
| 422 | if ($self->{_curr_match} eq '//') {
|
---|
| 423 | push @$loc_path, XML::XPath::Step->new($self, 'descendant-or-self',
|
---|
| 424 | XML::XPath::Step::test_nt_node);
|
---|
| 425 | }
|
---|
| 426 | push @$loc_path, RelativeLocationPath($self, $tokens);
|
---|
| 427 | my $new_expr = XML::XPath::Expr->new($self);
|
---|
| 428 | $new_expr->set_lhs($loc_path);
|
---|
| 429 | return $new_expr;
|
---|
| 430 | }
|
---|
| 431 | }
|
---|
| 432 |
|
---|
| 433 | return $expr;
|
---|
| 434 | }
|
---|
| 435 |
|
---|
| 436 | sub FilterExpr {
|
---|
| 437 | my ($self, $tokens) = @_;
|
---|
| 438 |
|
---|
| 439 | debug("in SUB\n");
|
---|
| 440 |
|
---|
| 441 | my $expr = PrimaryExpr($self, $tokens);
|
---|
| 442 | while (match($self, $tokens, '\\[')) {
|
---|
| 443 | # really PredicateExpr...
|
---|
| 444 | $expr->push_predicate(Expr($self, $tokens));
|
---|
| 445 | match($self, $tokens, '\\]', 1);
|
---|
| 446 | }
|
---|
| 447 |
|
---|
| 448 | return $expr;
|
---|
| 449 | }
|
---|
| 450 |
|
---|
| 451 | sub PrimaryExpr {
|
---|
| 452 | my ($self, $tokens) = @_;
|
---|
| 453 |
|
---|
| 454 | debug("in SUB\n");
|
---|
| 455 |
|
---|
| 456 | my $expr = XML::XPath::Expr->new($self);
|
---|
| 457 |
|
---|
| 458 | if (match($self, $tokens, $LITERAL)) {
|
---|
| 459 | # new Literal with $self->{_curr_match}...
|
---|
| 460 | $self->{_curr_match} =~ m/^(["'])(.*)\1$/;
|
---|
| 461 | $expr->set_lhs(XML::XPath::Literal->new($2));
|
---|
| 462 | }
|
---|
| 463 | elsif (match($self, $tokens, $NUMBER_RE)) {
|
---|
| 464 | # new Number with $self->{_curr_match}...
|
---|
| 465 | $expr->set_lhs(XML::XPath::Number->new($self->{_curr_match}));
|
---|
| 466 | }
|
---|
| 467 | elsif (match($self, $tokens, '\\(')) {
|
---|
| 468 | $expr->set_lhs(Expr($self, $tokens));
|
---|
| 469 | match($self, $tokens, '\\)', 1);
|
---|
| 470 | }
|
---|
| 471 | elsif (match($self, $tokens, "\\\$$QName")) {
|
---|
| 472 | # new Variable with $self->{_curr_match}...
|
---|
| 473 | $self->{_curr_match} =~ /^\$(.*)$/;
|
---|
| 474 | $expr->set_lhs(XML::XPath::Variable->new($self, $1));
|
---|
| 475 | }
|
---|
| 476 | elsif (match($self, $tokens, $QName)) {
|
---|
| 477 | # check match not Node_Type - done in lexer...
|
---|
| 478 | # new Function
|
---|
| 479 | my $func_name = $self->{_curr_match};
|
---|
| 480 | match($self, $tokens, '\\(', 1);
|
---|
| 481 | $expr->set_lhs(
|
---|
| 482 | XML::XPath::Function->new(
|
---|
| 483 | $self,
|
---|
| 484 | $func_name,
|
---|
| 485 | Arguments($self, $tokens)
|
---|
| 486 | )
|
---|
| 487 | );
|
---|
| 488 | match($self, $tokens, '\\)', 1);
|
---|
| 489 | }
|
---|
| 490 | else {
|
---|
| 491 | die "Not a PrimaryExpr at ", $tokens->[$self->{_tokpos}], "\n";
|
---|
| 492 | }
|
---|
| 493 |
|
---|
| 494 | return $expr;
|
---|
| 495 | }
|
---|
| 496 |
|
---|
| 497 | sub Arguments {
|
---|
| 498 | my ($self, $tokens) = @_;
|
---|
| 499 |
|
---|
| 500 | debug("in SUB\n");
|
---|
| 501 |
|
---|
| 502 | my @args;
|
---|
| 503 |
|
---|
| 504 | if($tokens->[$self->{_tokpos}] eq ')') {
|
---|
| 505 | return \@args;
|
---|
| 506 | }
|
---|
| 507 |
|
---|
| 508 | push @args, Expr($self, $tokens);
|
---|
| 509 | while (match($self, $tokens, ',')) {
|
---|
| 510 | push @args, Expr($self, $tokens);
|
---|
| 511 | }
|
---|
| 512 |
|
---|
| 513 | return \@args;
|
---|
| 514 | }
|
---|
| 515 |
|
---|
| 516 | sub LocationPath {
|
---|
| 517 | my ($self, $tokens) = @_;
|
---|
| 518 |
|
---|
| 519 | debug("in SUB\n");
|
---|
| 520 |
|
---|
| 521 | my $loc_path = XML::XPath::LocationPath->new();
|
---|
| 522 |
|
---|
| 523 | if (match($self, $tokens, '/')) {
|
---|
| 524 | # root
|
---|
| 525 | debug("SUB: Matched root\n");
|
---|
| 526 | push @$loc_path, XML::XPath::Root->new();
|
---|
| 527 | if (is_step($self, $tokens)) {
|
---|
| 528 | debug("Next is step\n");
|
---|
| 529 | push @$loc_path, RelativeLocationPath($self, $tokens);
|
---|
| 530 | }
|
---|
| 531 | }
|
---|
| 532 | elsif (match($self, $tokens, '//')) {
|
---|
| 533 | # root
|
---|
| 534 | push @$loc_path, XML::XPath::Root->new();
|
---|
| 535 | my $optimised = optimise_descendant_or_self($self, $tokens);
|
---|
| 536 | if (!$optimised) {
|
---|
| 537 | push @$loc_path, XML::XPath::Step->new($self, 'descendant-or-self',
|
---|
| 538 | XML::XPath::Step::test_nt_node);
|
---|
| 539 | push @$loc_path, RelativeLocationPath($self, $tokens);
|
---|
| 540 | }
|
---|
| 541 | else {
|
---|
| 542 | push @$loc_path, $optimised, RelativeLocationPath($self, $tokens);
|
---|
| 543 | }
|
---|
| 544 | }
|
---|
| 545 | else {
|
---|
| 546 | push @$loc_path, RelativeLocationPath($self, $tokens);
|
---|
| 547 | }
|
---|
| 548 |
|
---|
| 549 | return $loc_path;
|
---|
| 550 | }
|
---|
| 551 |
|
---|
| 552 | sub optimise_descendant_or_self {
|
---|
| 553 | my ($self, $tokens) = @_;
|
---|
| 554 |
|
---|
| 555 | debug("in SUB\n");
|
---|
| 556 |
|
---|
| 557 | my $tokpos = $self->{_tokpos};
|
---|
| 558 |
|
---|
| 559 | # // must be followed by a Step.
|
---|
| 560 | if ($tokens->[$tokpos+1] && $tokens->[$tokpos+1] eq '[') {
|
---|
| 561 | # next token is a predicate
|
---|
| 562 | return;
|
---|
| 563 | }
|
---|
| 564 | elsif ($tokens->[$tokpos] =~ /^\.\.?$/) {
|
---|
| 565 | # abbreviatedStep - can't optimise.
|
---|
| 566 | return;
|
---|
| 567 | }
|
---|
| 568 | else {
|
---|
| 569 | debug("Trying to optimise //\n");
|
---|
| 570 | my $step = Step($self, $tokens);
|
---|
| 571 | if ($step->{axis} ne 'child') {
|
---|
| 572 | # can't optimise axes other than child for now...
|
---|
| 573 | $self->{_tokpos} = $tokpos;
|
---|
| 574 | return;
|
---|
| 575 | }
|
---|
| 576 | $step->{axis} = 'descendant';
|
---|
| 577 | $step->{axis_method} = 'axis_descendant';
|
---|
| 578 | $self->{_tokpos}--;
|
---|
| 579 | $tokens->[$self->{_tokpos}] = '.';
|
---|
| 580 | return $step;
|
---|
| 581 | }
|
---|
| 582 | }
|
---|
| 583 |
|
---|
| 584 | sub RelativeLocationPath {
|
---|
| 585 | my ($self, $tokens) = @_;
|
---|
| 586 |
|
---|
| 587 | debug("in SUB\n");
|
---|
| 588 |
|
---|
| 589 | my @steps;
|
---|
| 590 |
|
---|
| 591 | push @steps, Step($self, $tokens);
|
---|
| 592 | while (match($self, $tokens, '//?')) {
|
---|
| 593 | if ($self->{_curr_match} eq '//') {
|
---|
| 594 | my $optimised = optimise_descendant_or_self($self, $tokens);
|
---|
| 595 | if (!$optimised) {
|
---|
| 596 | push @steps, XML::XPath::Step->new($self, 'descendant-or-self',
|
---|
| 597 | XML::XPath::Step::test_nt_node);
|
---|
| 598 | }
|
---|
| 599 | else {
|
---|
| 600 | push @steps, $optimised;
|
---|
| 601 | }
|
---|
| 602 | }
|
---|
| 603 | push @steps, Step($self, $tokens);
|
---|
| 604 | if (@steps > 1 &&
|
---|
| 605 | $steps[-1]->{axis} eq 'self' &&
|
---|
| 606 | $steps[-1]->{test} == XML::XPath::Step::test_nt_node) {
|
---|
| 607 | pop @steps;
|
---|
| 608 | }
|
---|
| 609 | }
|
---|
| 610 |
|
---|
| 611 | return @steps;
|
---|
| 612 | }
|
---|
| 613 |
|
---|
| 614 | sub Step {
|
---|
| 615 | my ($self, $tokens) = @_;
|
---|
| 616 |
|
---|
| 617 | debug("in SUB\n");
|
---|
| 618 |
|
---|
| 619 | if (match($self, $tokens, '\\.')) {
|
---|
| 620 | # self::node()
|
---|
| 621 | return XML::XPath::Step->new($self, 'self', XML::XPath::Step::test_nt_node);
|
---|
| 622 | }
|
---|
| 623 | elsif (match($self, $tokens, '\\.\\.')) {
|
---|
| 624 | # parent::node()
|
---|
| 625 | return XML::XPath::Step->new($self, 'parent', XML::XPath::Step::test_nt_node);
|
---|
| 626 | }
|
---|
| 627 | else {
|
---|
| 628 | # AxisSpecifier NodeTest Predicate(s?)
|
---|
| 629 | my $token = $tokens->[$self->{_tokpos}];
|
---|
| 630 |
|
---|
| 631 | debug("SUB: Checking $token\n");
|
---|
| 632 |
|
---|
| 633 | my $step;
|
---|
| 634 | if ($token eq 'processing-instruction') {
|
---|
| 635 | $self->{_tokpos}++;
|
---|
| 636 | match($self, $tokens, '\\(', 1);
|
---|
| 637 | match($self, $tokens, $LITERAL);
|
---|
| 638 | $self->{_curr_match} =~ /^["'](.*)["']$/;
|
---|
| 639 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 640 | XML::XPath::Step::test_nt_pi,
|
---|
| 641 | XML::XPath::Literal->new($1));
|
---|
| 642 | match($self, $tokens, '\\)', 1);
|
---|
| 643 | }
|
---|
| 644 | elsif ($token =~ /^\@($NCWild|$QName|$QNWild)$/o) {
|
---|
| 645 | $self->{_tokpos}++;
|
---|
| 646 | if ($token eq '@*') {
|
---|
| 647 | $step = XML::XPath::Step->new($self,
|
---|
| 648 | 'attribute',
|
---|
| 649 | XML::XPath::Step::test_attr_any,
|
---|
| 650 | '*');
|
---|
| 651 | }
|
---|
| 652 | elsif ($token =~ /^\@($NCName):\*$/o) {
|
---|
| 653 | $step = XML::XPath::Step->new($self,
|
---|
| 654 | 'attribute',
|
---|
| 655 | XML::XPath::Step::test_attr_ncwild,
|
---|
| 656 | $1);
|
---|
| 657 | }
|
---|
| 658 | elsif ($token =~ /^\@($QName)$/o) {
|
---|
| 659 | $step = XML::XPath::Step->new($self,
|
---|
| 660 | 'attribute',
|
---|
| 661 | XML::XPath::Step::test_attr_qname,
|
---|
| 662 | $1);
|
---|
| 663 | }
|
---|
| 664 | }
|
---|
| 665 | elsif ($token =~ /^($NCName):\*$/o) { # ns:*
|
---|
| 666 | $self->{_tokpos}++;
|
---|
| 667 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 668 | XML::XPath::Step::test_ncwild,
|
---|
| 669 | $1);
|
---|
| 670 | }
|
---|
| 671 | elsif ($token =~ /^$QNWild$/o) { # *
|
---|
| 672 | $self->{_tokpos}++;
|
---|
| 673 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 674 | XML::XPath::Step::test_any,
|
---|
| 675 | $token);
|
---|
| 676 | }
|
---|
| 677 | elsif ($token =~ /^$QName$/o) { # name:name
|
---|
| 678 | $self->{_tokpos}++;
|
---|
| 679 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 680 | XML::XPath::Step::test_qname,
|
---|
| 681 | $token);
|
---|
| 682 | }
|
---|
| 683 | elsif ($token eq 'comment()') {
|
---|
| 684 | $self->{_tokpos}++;
|
---|
| 685 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 686 | XML::XPath::Step::test_nt_comment);
|
---|
| 687 | }
|
---|
| 688 | elsif ($token eq 'text()') {
|
---|
| 689 | $self->{_tokpos}++;
|
---|
| 690 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 691 | XML::XPath::Step::test_nt_text);
|
---|
| 692 | }
|
---|
| 693 | elsif ($token eq 'node()') {
|
---|
| 694 | $self->{_tokpos}++;
|
---|
| 695 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 696 | XML::XPath::Step::test_nt_node);
|
---|
| 697 | }
|
---|
| 698 | elsif ($token eq 'processing-instruction()') {
|
---|
| 699 | $self->{_tokpos}++;
|
---|
| 700 | $step = XML::XPath::Step->new($self, 'child',
|
---|
| 701 | XML::XPath::Step::test_nt_pi);
|
---|
| 702 | }
|
---|
| 703 | elsif ($token =~ /^$AXIS_NAME($NCWild|$QName|$QNWild|$NODE_TYPE)$/o) {
|
---|
| 704 | my $axis = $1;
|
---|
| 705 | $self->{_tokpos}++;
|
---|
| 706 | $token = $2;
|
---|
| 707 | if ($token eq 'processing-instruction') {
|
---|
| 708 | match($self, $tokens, '\\(', 1);
|
---|
| 709 | match($self, $tokens, $LITERAL);
|
---|
| 710 | $self->{_curr_match} =~ /^["'](.*)["']$/;
|
---|
| 711 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 712 | XML::XPath::Step::test_nt_pi,
|
---|
| 713 | XML::XPath::Literal->new($1));
|
---|
| 714 | match($self, $tokens, '\\)', 1);
|
---|
| 715 | }
|
---|
| 716 | elsif ($token =~ /^($NCName):\*$/o) { # ns:*
|
---|
| 717 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 718 | (($axis eq 'attribute') ?
|
---|
| 719 | XML::XPath::Step::test_attr_ncwild
|
---|
| 720 | :
|
---|
| 721 | XML::XPath::Step::test_ncwild),
|
---|
| 722 | $1);
|
---|
| 723 | }
|
---|
| 724 | elsif ($token =~ /^$QNWild$/o) { # *
|
---|
| 725 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 726 | (($axis eq 'attribute') ?
|
---|
| 727 | XML::XPath::Step::test_attr_any
|
---|
| 728 | :
|
---|
| 729 | XML::XPath::Step::test_any),
|
---|
| 730 | $token);
|
---|
| 731 | }
|
---|
| 732 | elsif ($token =~ /^$QName$/o) { # name:name
|
---|
| 733 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 734 | (($axis eq 'attribute') ?
|
---|
| 735 | XML::XPath::Step::test_attr_qname
|
---|
| 736 | :
|
---|
| 737 | XML::XPath::Step::test_qname),
|
---|
| 738 | $token);
|
---|
| 739 | }
|
---|
| 740 | elsif ($token eq 'comment()') {
|
---|
| 741 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 742 | XML::XPath::Step::test_nt_comment);
|
---|
| 743 | }
|
---|
| 744 | elsif ($token eq 'text()') {
|
---|
| 745 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 746 | XML::XPath::Step::test_nt_text);
|
---|
| 747 | }
|
---|
| 748 | elsif ($token eq 'node()') {
|
---|
| 749 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 750 | XML::XPath::Step::test_nt_node);
|
---|
| 751 | }
|
---|
| 752 | elsif ($token eq 'processing-instruction()') {
|
---|
| 753 | $step = XML::XPath::Step->new($self, $axis,
|
---|
| 754 | XML::XPath::Step::test_nt_pi);
|
---|
| 755 | }
|
---|
| 756 | else {
|
---|
| 757 | die "Shouldn't get here";
|
---|
| 758 | }
|
---|
| 759 | }
|
---|
| 760 | else {
|
---|
| 761 | die "token $token doesn't match format of a 'Step'\n";
|
---|
| 762 | }
|
---|
| 763 |
|
---|
| 764 | while (match($self, $tokens, '\\[')) {
|
---|
| 765 | push @{$step->{predicates}}, Expr($self, $tokens);
|
---|
| 766 | match($self, $tokens, '\\]', 1);
|
---|
| 767 | }
|
---|
| 768 |
|
---|
| 769 | return $step;
|
---|
| 770 | }
|
---|
| 771 | }
|
---|
| 772 |
|
---|
| 773 | sub is_step {
|
---|
| 774 | my ($self, $tokens) = @_;
|
---|
| 775 |
|
---|
| 776 | my $token = $tokens->[$self->{_tokpos}];
|
---|
| 777 |
|
---|
| 778 | return unless defined $token;
|
---|
| 779 |
|
---|
| 780 | debug("SUB: Checking if '$token' is a step\n");
|
---|
| 781 |
|
---|
| 782 | local $^W;
|
---|
| 783 |
|
---|
| 784 | if ($token eq 'processing-instruction') {
|
---|
| 785 | return 1;
|
---|
| 786 | }
|
---|
| 787 | elsif ($token =~ /^\@($NCWild|$QName|$QNWild)$/o) {
|
---|
| 788 | return 1;
|
---|
| 789 | }
|
---|
| 790 | elsif ($token =~ /^($NCWild|$QName|$QNWild)$/o && $tokens->[$self->{_tokpos}+1] ne '(') {
|
---|
| 791 | return 1;
|
---|
| 792 | }
|
---|
| 793 | elsif ($token =~ /^$NODE_TYPE$/o) {
|
---|
| 794 | return 1;
|
---|
| 795 | }
|
---|
| 796 | elsif ($token =~ /^$AXIS_NAME($NCWild|$QName|$QNWild|$NODE_TYPE)$/o) {
|
---|
| 797 | return 1;
|
---|
| 798 | }
|
---|
| 799 |
|
---|
| 800 | debug("SUB: '$token' not a step\n");
|
---|
| 801 |
|
---|
| 802 | return;
|
---|
| 803 | }
|
---|
| 804 |
|
---|
| 805 | sub debug {
|
---|
| 806 | return unless $XML::XPath::Debug;
|
---|
| 807 |
|
---|
| 808 | my ($pkg, $file, $line, $sub) = caller(1);
|
---|
| 809 |
|
---|
| 810 | $sub =~ s/^$pkg\:://;
|
---|
| 811 |
|
---|
| 812 | while (@_) {
|
---|
| 813 | my $x = shift;
|
---|
| 814 | $x =~ s/\bPKG\b/$pkg/g;
|
---|
| 815 | $x =~ s/\bLINE\b/$line/g;
|
---|
| 816 | $x =~ s/\bSUB\b/$sub/g;
|
---|
| 817 | print STDERR $x;
|
---|
| 818 | }
|
---|
| 819 | }
|
---|
| 820 |
|
---|
| 821 | 1;
|
---|