[34138] | 1 | package XML::Parser::Expat;
|
---|
| 2 |
|
---|
| 3 | require 5.004;
|
---|
| 4 |
|
---|
| 5 | use strict;
|
---|
| 6 | use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path
|
---|
| 7 | $have_File_Spec);
|
---|
| 8 | use Carp;
|
---|
| 9 |
|
---|
| 10 | require DynaLoader;
|
---|
| 11 |
|
---|
| 12 | @ISA = qw(DynaLoader);
|
---|
| 13 | $VERSION = "2.41";
|
---|
| 14 |
|
---|
| 15 | $have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm';
|
---|
| 16 |
|
---|
| 17 | %Encoding_Table = ();
|
---|
| 18 | if ($have_File_Spec) {
|
---|
| 19 | @Encoding_Path = (grep(-d $_,
|
---|
| 20 | map(File::Spec->catdir($_, qw(XML Parser Encodings)),
|
---|
| 21 | @INC)),
|
---|
| 22 | File::Spec->curdir);
|
---|
| 23 | }
|
---|
| 24 | else {
|
---|
| 25 | @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.');
|
---|
| 26 | }
|
---|
| 27 |
|
---|
| 28 |
|
---|
| 29 | bootstrap XML::Parser::Expat $VERSION;
|
---|
| 30 |
|
---|
| 31 | %Handler_Setters = (
|
---|
| 32 | Start => \&SetStartElementHandler,
|
---|
| 33 | End => \&SetEndElementHandler,
|
---|
| 34 | Char => \&SetCharacterDataHandler,
|
---|
| 35 | Proc => \&SetProcessingInstructionHandler,
|
---|
| 36 | Comment => \&SetCommentHandler,
|
---|
| 37 | CdataStart => \&SetStartCdataHandler,
|
---|
| 38 | CdataEnd => \&SetEndCdataHandler,
|
---|
| 39 | Default => \&SetDefaultHandler,
|
---|
| 40 | Unparsed => \&SetUnparsedEntityDeclHandler,
|
---|
| 41 | Notation => \&SetNotationDeclHandler,
|
---|
| 42 | ExternEnt => \&SetExternalEntityRefHandler,
|
---|
| 43 | ExternEntFin => \&SetExtEntFinishHandler,
|
---|
| 44 | Entity => \&SetEntityDeclHandler,
|
---|
| 45 | Element => \&SetElementDeclHandler,
|
---|
| 46 | Attlist => \&SetAttListDeclHandler,
|
---|
| 47 | Doctype => \&SetDoctypeHandler,
|
---|
| 48 | DoctypeFin => \&SetEndDoctypeHandler,
|
---|
| 49 | XMLDecl => \&SetXMLDeclHandler
|
---|
| 50 | );
|
---|
| 51 |
|
---|
| 52 | sub new {
|
---|
| 53 | my ($class, %args) = @_;
|
---|
| 54 | my $self = bless \%args, $_[0];
|
---|
| 55 | $args{_State_} = 0;
|
---|
| 56 | $args{Context} = [];
|
---|
| 57 | $args{Namespaces} ||= 0;
|
---|
| 58 | $args{ErrorMessage} ||= '';
|
---|
| 59 | if ($args{Namespaces}) {
|
---|
| 60 | $args{Namespace_Table} = {};
|
---|
| 61 | $args{Namespace_List} = [undef];
|
---|
| 62 | $args{Prefix_Table} = {};
|
---|
| 63 | $args{New_Prefixes} = [];
|
---|
| 64 | }
|
---|
| 65 | $args{_Setters} = \%Handler_Setters;
|
---|
| 66 | $args{Parser} = ParserCreate($self, $args{ProtocolEncoding},
|
---|
| 67 | $args{Namespaces});
|
---|
| 68 | $self;
|
---|
| 69 | }
|
---|
| 70 |
|
---|
| 71 | sub load_encoding {
|
---|
| 72 | my ($file) = @_;
|
---|
| 73 |
|
---|
| 74 | $file =~ s!([^/]+)$!\L$1\E!;
|
---|
| 75 | $file .= '.enc' unless $file =~ /\.enc$/;
|
---|
| 76 | unless ($file =~ m!^/!) {
|
---|
| 77 | foreach (@Encoding_Path) {
|
---|
| 78 | my $tmp = ($have_File_Spec
|
---|
| 79 | ? File::Spec->catfile($_, $file)
|
---|
| 80 | : "$_/$file");
|
---|
| 81 | if (-e $tmp) {
|
---|
| 82 | $file = $tmp;
|
---|
| 83 | last;
|
---|
| 84 | }
|
---|
| 85 | }
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 | local(*ENC);
|
---|
| 89 | open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n");
|
---|
| 90 | binmode(ENC);
|
---|
| 91 | my $data;
|
---|
| 92 | my $br = sysread(ENC, $data, -s $file);
|
---|
| 93 | croak("Trouble reading $file:\n$!\n")
|
---|
| 94 | unless defined($br);
|
---|
| 95 | close(ENC);
|
---|
| 96 |
|
---|
| 97 | my $name = LoadEncoding($data, $br);
|
---|
| 98 | croak("$file isn't an encmap file")
|
---|
| 99 | unless defined($name);
|
---|
| 100 |
|
---|
| 101 | $name;
|
---|
| 102 | } # End load_encoding
|
---|
| 103 |
|
---|
| 104 | sub setHandlers {
|
---|
| 105 | my ($self, @handler_pairs) = @_;
|
---|
| 106 |
|
---|
| 107 | croak("Uneven number of arguments to setHandlers method")
|
---|
| 108 | if (int(@handler_pairs) & 1);
|
---|
| 109 |
|
---|
| 110 | my @ret;
|
---|
| 111 |
|
---|
| 112 | while (@handler_pairs) {
|
---|
| 113 | my $type = shift @handler_pairs;
|
---|
| 114 | my $handler = shift @handler_pairs;
|
---|
| 115 | croak "Handler for $type not a Code ref"
|
---|
| 116 | unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE');
|
---|
| 117 |
|
---|
| 118 | my $hndl = $self->{_Setters}->{$type};
|
---|
| 119 |
|
---|
| 120 | unless (defined($hndl)) {
|
---|
| 121 | my @types = sort keys %{$self->{_Setters}};
|
---|
| 122 | croak("Unknown Expat handler type: $type\n Valid types: @types");
|
---|
| 123 | }
|
---|
| 124 |
|
---|
| 125 | my $old = &$hndl($self->{Parser}, $handler);
|
---|
| 126 | push (@ret, $type, $old);
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | return @ret;
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | sub xpcroak
|
---|
| 133 | {
|
---|
| 134 | my ($self, $message) = @_;
|
---|
| 135 |
|
---|
| 136 | my $eclines = $self->{ErrorContext};
|
---|
| 137 | my $line = GetCurrentLineNumber($_[0]->{Parser});
|
---|
| 138 | $message .= " at line $line";
|
---|
| 139 | $message .= ":\n" . $self->position_in_context($eclines)
|
---|
| 140 | if defined($eclines);
|
---|
| 141 | croak $message;
|
---|
| 142 | }
|
---|
| 143 |
|
---|
| 144 | sub xpcarp {
|
---|
| 145 | my ($self, $message) = @_;
|
---|
| 146 |
|
---|
| 147 | my $eclines = $self->{ErrorContext};
|
---|
| 148 | my $line = GetCurrentLineNumber($_[0]->{Parser});
|
---|
| 149 | $message .= " at line $line";
|
---|
| 150 | $message .= ":\n" . $self->position_in_context($eclines)
|
---|
| 151 | if defined($eclines);
|
---|
| 152 | carp $message;
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | sub default_current {
|
---|
| 156 | my $self = shift;
|
---|
| 157 | if ($self->{_State_} == 1) {
|
---|
| 158 | return DefaultCurrent($self->{Parser});
|
---|
| 159 | }
|
---|
| 160 | }
|
---|
| 161 |
|
---|
| 162 | sub recognized_string {
|
---|
| 163 | my $self = shift;
|
---|
| 164 | if ($self->{_State_} == 1) {
|
---|
| 165 | return RecognizedString($self->{Parser});
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | sub original_string {
|
---|
| 170 | my $self = shift;
|
---|
| 171 | if ($self->{_State_} == 1) {
|
---|
| 172 | return OriginalString($self->{Parser});
|
---|
| 173 | }
|
---|
| 174 | }
|
---|
| 175 |
|
---|
| 176 | sub current_line {
|
---|
| 177 | my $self = shift;
|
---|
| 178 | if ($self->{_State_} == 1) {
|
---|
| 179 | return GetCurrentLineNumber($self->{Parser});
|
---|
| 180 | }
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 | sub current_column {
|
---|
| 184 | my $self = shift;
|
---|
| 185 | if ($self->{_State_} == 1) {
|
---|
| 186 | return GetCurrentColumnNumber($self->{Parser});
|
---|
| 187 | }
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | sub current_byte {
|
---|
| 191 | my $self = shift;
|
---|
| 192 | if ($self->{_State_} == 1) {
|
---|
| 193 | return GetCurrentByteIndex($self->{Parser});
|
---|
| 194 | }
|
---|
| 195 | }
|
---|
| 196 |
|
---|
| 197 | sub base {
|
---|
| 198 | my ($self, $newbase) = @_;
|
---|
| 199 | my $p = $self->{Parser};
|
---|
| 200 | my $oldbase = GetBase($p);
|
---|
| 201 | SetBase($p, $newbase) if @_ > 1;
|
---|
| 202 | return $oldbase;
|
---|
| 203 | }
|
---|
| 204 |
|
---|
| 205 | sub context {
|
---|
| 206 | my $ctx = $_[0]->{Context};
|
---|
| 207 | @$ctx;
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 | sub current_element {
|
---|
| 211 | my ($self) = @_;
|
---|
| 212 | @{$self->{Context}} ? $self->{Context}->[-1] : undef;
|
---|
| 213 | }
|
---|
| 214 |
|
---|
| 215 | sub in_element {
|
---|
| 216 | my ($self, $element) = @_;
|
---|
| 217 | @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element)
|
---|
| 218 | : undef;
|
---|
| 219 | }
|
---|
| 220 |
|
---|
| 221 | sub within_element {
|
---|
| 222 | my ($self, $element) = @_;
|
---|
| 223 | my $cnt = 0;
|
---|
| 224 | foreach (@{$self->{Context}}) {
|
---|
| 225 | $cnt++ if $self->eq_name($_, $element);
|
---|
| 226 | }
|
---|
| 227 | return $cnt;
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | sub depth {
|
---|
| 231 | my ($self) = @_;
|
---|
| 232 | int(@{$self->{Context}});
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | sub element_index {
|
---|
| 236 | my ($self) = @_;
|
---|
| 237 |
|
---|
| 238 | if ($self->{_State_} == 1) {
|
---|
| 239 | return ElementIndex($self->{Parser});
|
---|
| 240 | }
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | ################
|
---|
| 244 | # Namespace methods
|
---|
| 245 |
|
---|
| 246 | sub namespace {
|
---|
| 247 | my ($self, $name) = @_;
|
---|
| 248 | local($^W) = 0;
|
---|
| 249 | $self->{Namespace_List}->[int($name)];
|
---|
| 250 | }
|
---|
| 251 |
|
---|
| 252 | sub eq_name {
|
---|
| 253 | my ($self, $nm1, $nm2) = @_;
|
---|
| 254 | local($^W) = 0;
|
---|
| 255 |
|
---|
| 256 | int($nm1) == int($nm2) and $nm1 eq $nm2;
|
---|
| 257 | }
|
---|
| 258 |
|
---|
| 259 | sub generate_ns_name {
|
---|
| 260 | my ($self, $name, $namespace) = @_;
|
---|
| 261 |
|
---|
| 262 | $namespace ?
|
---|
| 263 | GenerateNSName($name, $namespace, $self->{Namespace_Table},
|
---|
| 264 | $self->{Namespace_List})
|
---|
| 265 | : $name;
|
---|
| 266 | }
|
---|
| 267 |
|
---|
| 268 | sub new_ns_prefixes {
|
---|
| 269 | my ($self) = @_;
|
---|
| 270 | if ($self->{Namespaces}) {
|
---|
| 271 | return @{$self->{New_Prefixes}};
|
---|
| 272 | }
|
---|
| 273 | return ();
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | sub expand_ns_prefix {
|
---|
| 277 | my ($self, $prefix) = @_;
|
---|
| 278 |
|
---|
| 279 | if ($self->{Namespaces}) {
|
---|
| 280 | my $stack = $self->{Prefix_Table}->{$prefix};
|
---|
| 281 | return (defined($stack) and @$stack) ? $stack->[-1] : undef;
|
---|
| 282 | }
|
---|
| 283 |
|
---|
| 284 | return undef;
|
---|
| 285 | }
|
---|
| 286 |
|
---|
| 287 | sub current_ns_prefixes {
|
---|
| 288 | my ($self) = @_;
|
---|
| 289 |
|
---|
| 290 | if ($self->{Namespaces}) {
|
---|
| 291 | my %set = %{$self->{Prefix_Table}};
|
---|
| 292 |
|
---|
| 293 | if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) {
|
---|
| 294 | delete $set{'#default'};
|
---|
| 295 | }
|
---|
| 296 |
|
---|
| 297 | return keys %set;
|
---|
| 298 | }
|
---|
| 299 |
|
---|
| 300 | return ();
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 |
|
---|
| 304 | ################################################################
|
---|
| 305 | # Namespace declaration handlers
|
---|
| 306 | #
|
---|
| 307 |
|
---|
| 308 | sub NamespaceStart {
|
---|
| 309 | my ($self, $prefix, $uri) = @_;
|
---|
| 310 |
|
---|
| 311 | $prefix = '#default' unless defined $prefix;
|
---|
| 312 | my $stack = $self->{Prefix_Table}->{$prefix};
|
---|
| 313 |
|
---|
| 314 | if (defined $stack) {
|
---|
| 315 | push(@$stack, $uri);
|
---|
| 316 | }
|
---|
| 317 | else {
|
---|
| 318 | $self->{Prefix_Table}->{$prefix} = [$uri];
|
---|
| 319 | }
|
---|
| 320 |
|
---|
| 321 | # The New_Prefixes list gets emptied at end of startElement function
|
---|
| 322 | # in Expat.xs
|
---|
| 323 |
|
---|
| 324 | push(@{$self->{New_Prefixes}}, $prefix);
|
---|
| 325 | }
|
---|
| 326 |
|
---|
| 327 | sub NamespaceEnd {
|
---|
| 328 | my ($self, $prefix) = @_;
|
---|
| 329 |
|
---|
| 330 | $prefix = '#default' unless defined $prefix;
|
---|
| 331 |
|
---|
| 332 | my $stack = $self->{Prefix_Table}->{$prefix};
|
---|
| 333 | if (@$stack > 1) {
|
---|
| 334 | pop(@$stack);
|
---|
| 335 | }
|
---|
| 336 | else {
|
---|
| 337 | delete $self->{Prefix_Table}->{$prefix};
|
---|
| 338 | }
|
---|
| 339 | }
|
---|
| 340 |
|
---|
| 341 | ################
|
---|
| 342 |
|
---|
| 343 | sub specified_attr {
|
---|
| 344 | my $self = shift;
|
---|
| 345 |
|
---|
| 346 | if ($self->{_State_} == 1) {
|
---|
| 347 | return GetSpecifiedAttributeCount($self->{Parser});
|
---|
| 348 | }
|
---|
| 349 | }
|
---|
| 350 |
|
---|
| 351 | sub finish {
|
---|
| 352 | my ($self) = @_;
|
---|
| 353 | if ($self->{_State_} == 1) {
|
---|
| 354 | my $parser = $self->{Parser};
|
---|
| 355 | UnsetAllHandlers($parser);
|
---|
| 356 | }
|
---|
| 357 | }
|
---|
| 358 |
|
---|
| 359 | sub position_in_context {
|
---|
| 360 | my ($self, $lines) = @_;
|
---|
| 361 | if ($self->{_State_} == 1) {
|
---|
| 362 | my $parser = $self->{Parser};
|
---|
| 363 | my ($string, $linepos) = PositionContext($parser, $lines);
|
---|
| 364 |
|
---|
| 365 | return '' unless defined($string);
|
---|
| 366 |
|
---|
| 367 | my $col = GetCurrentColumnNumber($parser);
|
---|
| 368 | my $ptr = ('=' x ($col - 1)) . '^' . "\n";
|
---|
| 369 | my $ret;
|
---|
| 370 | my $dosplit = $linepos < length($string);
|
---|
| 371 |
|
---|
| 372 | $string .= "\n" unless $string =~ /\n$/;
|
---|
| 373 |
|
---|
| 374 | if ($dosplit) {
|
---|
| 375 | $ret = substr($string, 0, $linepos) . $ptr
|
---|
| 376 | . substr($string, $linepos);
|
---|
| 377 | } else {
|
---|
| 378 | $ret = $string . $ptr;
|
---|
| 379 | }
|
---|
| 380 |
|
---|
| 381 | return $ret;
|
---|
| 382 | }
|
---|
| 383 | }
|
---|
| 384 |
|
---|
| 385 | sub xml_escape {
|
---|
| 386 | my $self = shift;
|
---|
| 387 | my $text = shift;
|
---|
| 388 |
|
---|
| 389 | study $text;
|
---|
| 390 | $text =~ s/\&/\&/g;
|
---|
| 391 | $text =~ s/</\</g;
|
---|
| 392 | foreach (@_) {
|
---|
| 393 | croak "xml_escape: '$_' isn't a single character" if length($_) > 1;
|
---|
| 394 |
|
---|
| 395 | if ($_ eq '>') {
|
---|
| 396 | $text =~ s/>/\>/g;
|
---|
| 397 | }
|
---|
| 398 | elsif ($_ eq '"') {
|
---|
| 399 | $text =~ s/\"/\"/;
|
---|
| 400 | }
|
---|
| 401 | elsif ($_ eq "'") {
|
---|
| 402 | $text =~ s/\'/\'/;
|
---|
| 403 | }
|
---|
| 404 | else {
|
---|
| 405 | my $rep = '&#' . sprintf('x%X', ord($_)) . ';';
|
---|
| 406 | if (/\W/) {
|
---|
| 407 | my $ptrn = "\\$_";
|
---|
| 408 | $text =~ s/$ptrn/$rep/g;
|
---|
| 409 | }
|
---|
| 410 | else {
|
---|
| 411 | $text =~ s/$_/$rep/g;
|
---|
| 412 | }
|
---|
| 413 | }
|
---|
| 414 | }
|
---|
| 415 | $text;
|
---|
| 416 | }
|
---|
| 417 |
|
---|
| 418 | sub skip_until {
|
---|
| 419 | my $self = shift;
|
---|
| 420 | if ($self->{_State_} <= 1) {
|
---|
| 421 | SkipUntil($self->{Parser}, $_[0]);
|
---|
| 422 | }
|
---|
| 423 | }
|
---|
| 424 |
|
---|
| 425 | sub release {
|
---|
| 426 | my $self = shift;
|
---|
| 427 | ParserRelease($self->{Parser});
|
---|
| 428 | }
|
---|
| 429 |
|
---|
| 430 | sub DESTROY {
|
---|
| 431 | my $self = shift;
|
---|
| 432 | ParserFree($self->{Parser});
|
---|
| 433 | }
|
---|
| 434 |
|
---|
| 435 | sub parse {
|
---|
| 436 | my $self = shift;
|
---|
| 437 | my $arg = shift;
|
---|
| 438 | croak "Parse already in progress (Expat)" if $self->{_State_};
|
---|
| 439 | $self->{_State_} = 1;
|
---|
| 440 | my $parser = $self->{Parser};
|
---|
| 441 | my $ioref;
|
---|
| 442 | my $result = 0;
|
---|
| 443 |
|
---|
| 444 | if (defined $arg) {
|
---|
| 445 | local *@;
|
---|
| 446 | if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) {
|
---|
| 447 | $ioref = $arg;
|
---|
| 448 | } elsif ($] < 5.008 and defined tied($arg)) {
|
---|
| 449 | require IO::Handle;
|
---|
| 450 | $ioref = $arg;
|
---|
| 451 | }
|
---|
| 452 | else {
|
---|
| 453 | require IO::Handle;
|
---|
| 454 | eval {
|
---|
| 455 | no strict 'refs';
|
---|
| 456 | $ioref = *{$arg}{IO} if defined *{$arg};
|
---|
| 457 | };
|
---|
| 458 | }
|
---|
| 459 | }
|
---|
| 460 |
|
---|
| 461 | if (defined($ioref)) {
|
---|
| 462 | my $delim = $self->{Stream_Delimiter};
|
---|
| 463 | my $prev_rs;
|
---|
| 464 | my $ioclass = ref $ioref;
|
---|
| 465 | $ioclass = "IO::Handle" if !length $ioclass;
|
---|
| 466 |
|
---|
| 467 | $prev_rs = $ioclass->input_record_separator("\n$delim\n")
|
---|
| 468 | if defined($delim);
|
---|
| 469 |
|
---|
| 470 | $result = ParseStream($parser, $ioref, $delim);
|
---|
| 471 |
|
---|
| 472 | $ioclass->input_record_separator($prev_rs)
|
---|
| 473 | if defined($delim);
|
---|
| 474 | } else {
|
---|
| 475 | $result = ParseString($parser, $arg);
|
---|
| 476 | }
|
---|
| 477 |
|
---|
| 478 | $self->{_State_} = 2;
|
---|
| 479 | $result or croak $self->{ErrorMessage};
|
---|
| 480 | }
|
---|
| 481 |
|
---|
| 482 | sub parsestring {
|
---|
| 483 | my $self = shift;
|
---|
| 484 | $self->parse(@_);
|
---|
| 485 | }
|
---|
| 486 |
|
---|
| 487 | sub parsefile {
|
---|
| 488 | my $self = shift;
|
---|
| 489 | croak "Parser has already been used" if $self->{_State_};
|
---|
| 490 | local(*FILE);
|
---|
| 491 | open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!";
|
---|
| 492 | binmode(FILE);
|
---|
| 493 | my $ret = $self->parse(*FILE);
|
---|
| 494 | close(FILE);
|
---|
| 495 | $ret;
|
---|
| 496 | }
|
---|
| 497 |
|
---|
| 498 | ################################################################
|
---|
| 499 | package #hide from PAUSE
|
---|
| 500 | XML::Parser::ContentModel;
|
---|
| 501 | use overload '""' => \&asString, 'eq' => \&thiseq;
|
---|
| 502 |
|
---|
| 503 | sub EMPTY () {1}
|
---|
| 504 | sub ANY () {2}
|
---|
| 505 | sub MIXED () {3}
|
---|
| 506 | sub NAME () {4}
|
---|
| 507 | sub CHOICE () {5}
|
---|
| 508 | sub SEQ () {6}
|
---|
| 509 |
|
---|
| 510 |
|
---|
| 511 | sub isempty {
|
---|
| 512 | return $_[0]->{Type} == EMPTY;
|
---|
| 513 | }
|
---|
| 514 |
|
---|
| 515 | sub isany {
|
---|
| 516 | return $_[0]->{Type} == ANY;
|
---|
| 517 | }
|
---|
| 518 |
|
---|
| 519 | sub ismixed {
|
---|
| 520 | return $_[0]->{Type} == MIXED;
|
---|
| 521 | }
|
---|
| 522 |
|
---|
| 523 | sub isname {
|
---|
| 524 | return $_[0]->{Type} == NAME;
|
---|
| 525 | }
|
---|
| 526 |
|
---|
| 527 | sub name {
|
---|
| 528 | return $_[0]->{Tag};
|
---|
| 529 | }
|
---|
| 530 |
|
---|
| 531 | sub ischoice {
|
---|
| 532 | return $_[0]->{Type} == CHOICE;
|
---|
| 533 | }
|
---|
| 534 |
|
---|
| 535 | sub isseq {
|
---|
| 536 | return $_[0]->{Type} == SEQ;
|
---|
| 537 | }
|
---|
| 538 |
|
---|
| 539 | sub quant {
|
---|
| 540 | return $_[0]->{Quant};
|
---|
| 541 | }
|
---|
| 542 |
|
---|
| 543 | sub children {
|
---|
| 544 | my $children = $_[0]->{Children};
|
---|
| 545 | if (defined $children) {
|
---|
| 546 | return @$children;
|
---|
| 547 | }
|
---|
| 548 | return undef;
|
---|
| 549 | }
|
---|
| 550 |
|
---|
| 551 | sub asString {
|
---|
| 552 | my ($self) = @_;
|
---|
| 553 | my $ret;
|
---|
| 554 |
|
---|
| 555 | if ($self->{Type} == NAME) {
|
---|
| 556 | $ret = $self->{Tag};
|
---|
| 557 | }
|
---|
| 558 | elsif ($self->{Type} == EMPTY) {
|
---|
| 559 | return "EMPTY";
|
---|
| 560 | }
|
---|
| 561 | elsif ($self->{Type} == ANY) {
|
---|
| 562 | return "ANY";
|
---|
| 563 | }
|
---|
| 564 | elsif ($self->{Type} == MIXED) {
|
---|
| 565 | $ret = '(#PCDATA';
|
---|
| 566 | foreach (@{$self->{Children}}) {
|
---|
| 567 | $ret .= '|' . $_;
|
---|
| 568 | }
|
---|
| 569 | $ret .= ')';
|
---|
| 570 | }
|
---|
| 571 | else {
|
---|
| 572 | my $sep = $self->{Type} == CHOICE ? '|' : ',';
|
---|
| 573 | $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')';
|
---|
| 574 | }
|
---|
| 575 |
|
---|
| 576 | $ret .= $self->{Quant} if $self->{Quant};
|
---|
| 577 | return $ret;
|
---|
| 578 | }
|
---|
| 579 |
|
---|
| 580 | sub thiseq {
|
---|
| 581 | my $self = shift;
|
---|
| 582 |
|
---|
| 583 | return $self->asString eq $_[0];
|
---|
| 584 | }
|
---|
| 585 |
|
---|
| 586 | ################################################################
|
---|
| 587 | package #hide from PAUSE
|
---|
| 588 | XML::Parser::ExpatNB;
|
---|
| 589 |
|
---|
| 590 | use vars qw(@ISA);
|
---|
| 591 | use Carp;
|
---|
| 592 |
|
---|
| 593 | @ISA = qw(XML::Parser::Expat);
|
---|
| 594 |
|
---|
| 595 | sub parse {
|
---|
| 596 | my $self = shift;
|
---|
| 597 | my $class = ref($self);
|
---|
| 598 | croak "parse method not supported in $class";
|
---|
| 599 | }
|
---|
| 600 |
|
---|
| 601 | sub parsestring {
|
---|
| 602 | my $self = shift;
|
---|
| 603 | my $class = ref($self);
|
---|
| 604 | croak "parsestring method not supported in $class";
|
---|
| 605 | }
|
---|
| 606 |
|
---|
| 607 | sub parsefile {
|
---|
| 608 | my $self = shift;
|
---|
| 609 | my $class = ref($self);
|
---|
| 610 | croak "parsefile method not supported in $class";
|
---|
| 611 | }
|
---|
| 612 |
|
---|
| 613 | sub parse_more {
|
---|
| 614 | my ($self, $data) = @_;
|
---|
| 615 |
|
---|
| 616 | $self->{_State_} = 1;
|
---|
| 617 | my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data);
|
---|
| 618 |
|
---|
| 619 | croak $self->{ErrorMessage} unless $ret;
|
---|
| 620 | }
|
---|
| 621 |
|
---|
| 622 | sub parse_done {
|
---|
| 623 | my $self = shift;
|
---|
| 624 |
|
---|
| 625 | my $ret = XML::Parser::Expat::ParseDone($self->{Parser});
|
---|
| 626 | unless ($ret) {
|
---|
| 627 | my $msg = $self->{ErrorMessage};
|
---|
| 628 | $self->release;
|
---|
| 629 | croak $msg;
|
---|
| 630 | }
|
---|
| 631 |
|
---|
| 632 | $self->{_State_} = 2;
|
---|
| 633 |
|
---|
| 634 | my $result = $ret;
|
---|
| 635 | my @result = ();
|
---|
| 636 | my $final = $self->{FinalHandler};
|
---|
| 637 | if (defined $final) {
|
---|
| 638 | if (wantarray) {
|
---|
| 639 | @result = &$final($self);
|
---|
| 640 | }
|
---|
| 641 | else {
|
---|
| 642 | $result = &$final($self);
|
---|
| 643 | }
|
---|
| 644 | }
|
---|
| 645 |
|
---|
| 646 | $self->release;
|
---|
| 647 |
|
---|
| 648 | return unless defined wantarray;
|
---|
| 649 | return wantarray ? @result : $result;
|
---|
| 650 | }
|
---|
| 651 |
|
---|
| 652 | ################################################################
|
---|
| 653 |
|
---|
| 654 | package #hide from PAUSE
|
---|
| 655 | XML::Parser::Encinfo;
|
---|
| 656 |
|
---|
| 657 | sub DESTROY {
|
---|
| 658 | my $self = shift;
|
---|
| 659 | XML::Parser::Expat::FreeEncoding($self);
|
---|
| 660 | }
|
---|
| 661 |
|
---|
| 662 | 1;
|
---|
| 663 |
|
---|
| 664 | __END__
|
---|
| 665 |
|
---|
| 666 | =head1 NAME
|
---|
| 667 |
|
---|
| 668 | XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser
|
---|
| 669 |
|
---|
| 670 | =head1 SYNOPSIS
|
---|
| 671 |
|
---|
| 672 | use XML::Parser::Expat;
|
---|
| 673 |
|
---|
| 674 | $parser = XML::Parser::Expat->new;
|
---|
| 675 | $parser->setHandlers('Start' => \&sh,
|
---|
| 676 | 'End' => \&eh,
|
---|
| 677 | 'Char' => \&ch);
|
---|
| 678 | open(FOO, '<', 'info.xml') or die "Couldn't open";
|
---|
| 679 | $parser->parse(*FOO);
|
---|
| 680 | close(FOO);
|
---|
| 681 | # $parser->parse('<foo id="me"> here <em>we</em> go </foo>');
|
---|
| 682 |
|
---|
| 683 | sub sh
|
---|
| 684 | {
|
---|
| 685 | my ($p, $el, %atts) = @_;
|
---|
| 686 | $p->setHandlers('Char' => \&spec)
|
---|
| 687 | if ($el eq 'special');
|
---|
| 688 | ...
|
---|
| 689 | }
|
---|
| 690 |
|
---|
| 691 | sub eh
|
---|
| 692 | {
|
---|
| 693 | my ($p, $el) = @_;
|
---|
| 694 | $p->setHandlers('Char' => \&ch) # Special elements won't contain
|
---|
| 695 | if ($el eq 'special'); # other special elements
|
---|
| 696 | ...
|
---|
| 697 | }
|
---|
| 698 |
|
---|
| 699 | =head1 DESCRIPTION
|
---|
| 700 |
|
---|
| 701 | This module provides an interface to James Clark's XML parser, expat. As in
|
---|
| 702 | expat, a single instance of the parser can only parse one document. Calls
|
---|
| 703 | to parsestring after the first for a given instance will die.
|
---|
| 704 |
|
---|
| 705 | Expat (and XML::Parser::Expat) are event based. As the parser recognizes
|
---|
| 706 | parts of the document (say the start or end of an XML element), then any
|
---|
| 707 | handlers registered for that type of an event are called with suitable
|
---|
| 708 | parameters.
|
---|
| 709 |
|
---|
| 710 | =head1 METHODS
|
---|
| 711 |
|
---|
| 712 | =over 4
|
---|
| 713 |
|
---|
| 714 | =item new
|
---|
| 715 |
|
---|
| 716 | This is a class method, the constructor for XML::Parser::Expat. Options are
|
---|
| 717 | passed as keyword value pairs. The recognized options are:
|
---|
| 718 |
|
---|
| 719 | =over 4
|
---|
| 720 |
|
---|
| 721 | =item * ProtocolEncoding
|
---|
| 722 |
|
---|
| 723 | The protocol encoding name. The default is none. The expat built-in
|
---|
| 724 | encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>.
|
---|
| 725 | Other encodings may be used if they have encoding maps in one of the
|
---|
| 726 | directories in the @Encoding_Path list. Setting the protocol encoding
|
---|
| 727 | overrides any encoding in the XML declaration.
|
---|
| 728 |
|
---|
| 729 | =item * Namespaces
|
---|
| 730 |
|
---|
| 731 | When this option is given with a true value, then the parser does namespace
|
---|
| 732 | processing. By default, namespace processing is turned off. When it is
|
---|
| 733 | turned on, the parser consumes I<xmlns> attributes and strips off prefixes
|
---|
| 734 | from element and attributes names where those prefixes have a defined
|
---|
| 735 | namespace. A name's namespace can be found using the L<"namespace"> method
|
---|
| 736 | and two names can be checked for absolute equality with the L<"eq_name">
|
---|
| 737 | method.
|
---|
| 738 |
|
---|
| 739 | =item * NoExpand
|
---|
| 740 |
|
---|
| 741 | Normally, the parser will try to expand references to entities defined in
|
---|
| 742 | the internal subset. If this option is set to a true value, and a default
|
---|
| 743 | handler is also set, then the default handler will be called when an
|
---|
| 744 | entity reference is seen in text. This has no effect if a default handler
|
---|
| 745 | has not been registered, and it has no effect on the expansion of entity
|
---|
| 746 | references inside attribute values.
|
---|
| 747 |
|
---|
| 748 | =item * Stream_Delimiter
|
---|
| 749 |
|
---|
| 750 | This option takes a string value. When this string is found alone on a line
|
---|
| 751 | while parsing from a stream, then the parse is ended as if it saw an end of
|
---|
| 752 | file. The intended use is with a stream of xml documents in a MIME multipart
|
---|
| 753 | format. The string should not contain a trailing newline.
|
---|
| 754 |
|
---|
| 755 | =item * ErrorContext
|
---|
| 756 |
|
---|
| 757 | When this option is defined, errors are reported in context. The value
|
---|
| 758 | of ErrorContext should be the number of lines to show on either side of
|
---|
| 759 | the line in which the error occurred.
|
---|
| 760 |
|
---|
| 761 | =item * ParseParamEnt
|
---|
| 762 |
|
---|
| 763 | Unless standalone is set to "yes" in the XML declaration, setting this to
|
---|
| 764 | a true value allows the external DTD to be read, and parameter entities
|
---|
| 765 | to be parsed and expanded.
|
---|
| 766 |
|
---|
| 767 | =item * Base
|
---|
| 768 |
|
---|
| 769 | The base to use for relative pathnames or URLs. This can also be done by
|
---|
| 770 | using the base method.
|
---|
| 771 |
|
---|
| 772 | =back
|
---|
| 773 |
|
---|
| 774 | =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
|
---|
| 775 |
|
---|
| 776 | This method registers handlers for the various events. If no handlers are
|
---|
| 777 | registered, then a call to parsestring or parsefile will only determine if
|
---|
| 778 | the corresponding XML document is well formed (by returning without error.)
|
---|
| 779 | This may be called from within a handler, after the parse has started.
|
---|
| 780 |
|
---|
| 781 | Setting a handler to something that evaluates to false unsets that
|
---|
| 782 | handler.
|
---|
| 783 |
|
---|
| 784 | This method returns a list of type, handler pairs corresponding to the
|
---|
| 785 | input. The handlers returned are the ones that were in effect before the
|
---|
| 786 | call to setHandlers.
|
---|
| 787 |
|
---|
| 788 | The recognized events and the parameters passed to the corresponding
|
---|
| 789 | handlers are:
|
---|
| 790 |
|
---|
| 791 | =over 4
|
---|
| 792 |
|
---|
| 793 | =item * Start (Parser, Element [, Attr, Val [,...]])
|
---|
| 794 |
|
---|
| 795 | This event is generated when an XML start tag is recognized. Parser is
|
---|
| 796 | an XML::Parser::Expat instance. Element is the name of the XML element that
|
---|
| 797 | is opened with the start tag. The Attr & Val pairs are generated for each
|
---|
| 798 | attribute in the start tag.
|
---|
| 799 |
|
---|
| 800 | =item * End (Parser, Element)
|
---|
| 801 |
|
---|
| 802 | This event is generated when an XML end tag is recognized. Note that
|
---|
| 803 | an XML empty tag (<foo/>) generates both a start and an end event.
|
---|
| 804 |
|
---|
| 805 | There is always a lower level start and end handler installed that wrap
|
---|
| 806 | the corresponding callbacks. This is to handle the context mechanism.
|
---|
| 807 | A consequence of this is that the default handler (see below) will not
|
---|
| 808 | see a start tag or end tag unless the default_current method is called.
|
---|
| 809 |
|
---|
| 810 | =item * Char (Parser, String)
|
---|
| 811 |
|
---|
| 812 | This event is generated when non-markup is recognized. The non-markup
|
---|
| 813 | sequence of characters is in String. A single non-markup sequence of
|
---|
| 814 | characters may generate multiple calls to this handler. Whatever the
|
---|
| 815 | encoding of the string in the original document, this is given to the
|
---|
| 816 | handler in UTF-8.
|
---|
| 817 |
|
---|
| 818 | =item * Proc (Parser, Target, Data)
|
---|
| 819 |
|
---|
| 820 | This event is generated when a processing instruction is recognized.
|
---|
| 821 |
|
---|
| 822 | =item * Comment (Parser, String)
|
---|
| 823 |
|
---|
| 824 | This event is generated when a comment is recognized.
|
---|
| 825 |
|
---|
| 826 | =item * CdataStart (Parser)
|
---|
| 827 |
|
---|
| 828 | This is called at the start of a CDATA section.
|
---|
| 829 |
|
---|
| 830 | =item * CdataEnd (Parser)
|
---|
| 831 |
|
---|
| 832 | This is called at the end of a CDATA section.
|
---|
| 833 |
|
---|
| 834 | =item * Default (Parser, String)
|
---|
| 835 |
|
---|
| 836 | This is called for any characters that don't have a registered handler.
|
---|
| 837 | This includes both characters that are part of markup for which no
|
---|
| 838 | events are generated (markup declarations) and characters that
|
---|
| 839 | could generate events, but for which no handler has been registered.
|
---|
| 840 |
|
---|
| 841 | Whatever the encoding in the original document, the string is returned to
|
---|
| 842 | the handler in UTF-8.
|
---|
| 843 |
|
---|
| 844 | =item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation)
|
---|
| 845 |
|
---|
| 846 | This is called for a declaration of an unparsed entity. Entity is the name
|
---|
| 847 | of the entity. Base is the base to be used for resolving a relative URI.
|
---|
| 848 | Sysid is the system id. Pubid is the public id. Notation is the notation
|
---|
| 849 | name. Base and Pubid may be undefined.
|
---|
| 850 |
|
---|
| 851 | =item * Notation (Parser, Notation, Base, Sysid, Pubid)
|
---|
| 852 |
|
---|
| 853 | This is called for a declaration of notation. Notation is the notation name.
|
---|
| 854 | Base is the base to be used for resolving a relative URI. Sysid is the system
|
---|
| 855 | id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
|
---|
| 856 |
|
---|
| 857 | =item * ExternEnt (Parser, Base, Sysid, Pubid)
|
---|
| 858 |
|
---|
| 859 | This is called when an external entity is referenced. Base is the base to be
|
---|
| 860 | used for resolving a relative URI. Sysid is the system id. Pubid is the public
|
---|
| 861 | id. Base, and Pubid may be undefined.
|
---|
| 862 |
|
---|
| 863 | This handler should either return a string, which represents the contents of
|
---|
| 864 | the external entity, or return an open filehandle that can be read to obtain
|
---|
| 865 | the contents of the external entity, or return undef, which indicates the
|
---|
| 866 | external entity couldn't be found and will generate a parse error.
|
---|
| 867 |
|
---|
| 868 | If an open filehandle is returned, it must be returned as either a glob
|
---|
| 869 | (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
|
---|
| 870 |
|
---|
| 871 | =item * ExternEntFin (Parser)
|
---|
| 872 |
|
---|
| 873 | This is called after an external entity has been parsed. It allows
|
---|
| 874 | applications to perform cleanup on actions performed in the above
|
---|
| 875 | ExternEnt handler.
|
---|
| 876 |
|
---|
| 877 | =item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam)
|
---|
| 878 |
|
---|
| 879 | This is called when an entity is declared. For internal entities, the Val
|
---|
| 880 | parameter will contain the value and the remaining three parameters will
|
---|
| 881 | be undefined. For external entities, the Val parameter
|
---|
| 882 | will be undefined, the Sysid parameter will have the system id, the Pubid
|
---|
| 883 | parameter will have the public id if it was provided (it will be undefined
|
---|
| 884 | otherwise), the Ndata parameter will contain the notation for unparsed
|
---|
| 885 | entities. If this is a parameter entity declaration, then the IsParam
|
---|
| 886 | parameter is true.
|
---|
| 887 |
|
---|
| 888 | Note that this handler and the Unparsed handler above overlap. If both are
|
---|
| 889 | set, then this handler will not be called for unparsed entities.
|
---|
| 890 |
|
---|
| 891 | =item * Element (Parser, Name, Model)
|
---|
| 892 |
|
---|
| 893 | The element handler is called when an element declaration is found. Name is
|
---|
| 894 | the element name, and Model is the content model as an
|
---|
| 895 | XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods">
|
---|
| 896 | for methods available for this class.
|
---|
| 897 |
|
---|
| 898 | =item * Attlist (Parser, Elname, Attname, Type, Default, Fixed)
|
---|
| 899 |
|
---|
| 900 | This handler is called for each attribute in an ATTLIST declaration.
|
---|
| 901 | So an ATTLIST declaration that has multiple attributes
|
---|
| 902 | will generate multiple calls to this handler. The Elname parameter is the
|
---|
| 903 | name of the element with which the attribute is being associated. The Attname
|
---|
| 904 | parameter is the name of the attribute. Type is the attribute type, given as
|
---|
| 905 | a string. Default is the default value, which will either be "#REQUIRED",
|
---|
| 906 | "#IMPLIED" or a quoted string (i.e. the returned string will begin and end
|
---|
| 907 | with a quote character). If Fixed is true, then this is a fixed attribute.
|
---|
| 908 |
|
---|
| 909 | =item * Doctype (Parser, Name, Sysid, Pubid, Internal)
|
---|
| 910 |
|
---|
| 911 | This handler is called for DOCTYPE declarations. Name is the document type
|
---|
| 912 | name. Sysid is the system id of the document type, if it was provided,
|
---|
| 913 | otherwise it's undefined. Pubid is the public id of the document type,
|
---|
| 914 | which will be undefined if no public id was given. Internal will be
|
---|
| 915 | true or false, indicating whether or not the doctype declaration contains
|
---|
| 916 | an internal subset.
|
---|
| 917 |
|
---|
| 918 | =item * DoctypeFin (Parser)
|
---|
| 919 |
|
---|
| 920 | This handler is called after parsing of the DOCTYPE declaration has finished,
|
---|
| 921 | including any internal or external DTD declarations.
|
---|
| 922 |
|
---|
| 923 | =item * XMLDecl (Parser, Version, Encoding, Standalone)
|
---|
| 924 |
|
---|
| 925 | This handler is called for XML declarations. Version is a string containg
|
---|
| 926 | the version. Encoding is either undefined or contains an encoding string.
|
---|
| 927 | Standalone is either undefined, or true or false. Undefined indicates
|
---|
| 928 | that no standalone parameter was given in the XML declaration. True or
|
---|
| 929 | false indicates "yes" or "no" respectively.
|
---|
| 930 |
|
---|
| 931 | =back
|
---|
| 932 |
|
---|
| 933 | =item namespace(name)
|
---|
| 934 |
|
---|
| 935 | Return the URI of the namespace that the name belongs to. If the name doesn't
|
---|
| 936 | belong to any namespace, an undef is returned. This is only valid on names
|
---|
| 937 | received through the Start or End handlers from a single document, or through
|
---|
| 938 | a call to the generate_ns_name method. In other words, don't use names
|
---|
| 939 | generated from one instance of XML::Parser::Expat with other instances.
|
---|
| 940 |
|
---|
| 941 | =item eq_name(name1, name2)
|
---|
| 942 |
|
---|
| 943 | Return true if name1 and name2 are identical (i.e. same name and from
|
---|
| 944 | the same namespace.) This is only meaningful if both names were obtained
|
---|
| 945 | through the Start or End handlers from a single document, or through
|
---|
| 946 | a call to the generate_ns_name method.
|
---|
| 947 |
|
---|
| 948 | =item generate_ns_name(name, namespace)
|
---|
| 949 |
|
---|
| 950 | Return a name, associated with a given namespace, good for using with the
|
---|
| 951 | above 2 methods. The namespace argument should be the namespace URI, not
|
---|
| 952 | a prefix.
|
---|
| 953 |
|
---|
| 954 | =item new_ns_prefixes
|
---|
| 955 |
|
---|
| 956 | When called from a start tag handler, returns namespace prefixes declared
|
---|
| 957 | with this start tag. If called elsewere (or if there were no namespace
|
---|
| 958 | prefixes declared), it returns an empty list. Setting of the default
|
---|
| 959 | namespace is indicated with '#default' as a prefix.
|
---|
| 960 |
|
---|
| 961 | =item expand_ns_prefix(prefix)
|
---|
| 962 |
|
---|
| 963 | Return the uri to which the given prefix is currently bound. Returns
|
---|
| 964 | undef if the prefix isn't currently bound. Use '#default' to find the
|
---|
| 965 | current binding of the default namespace (if any).
|
---|
| 966 |
|
---|
| 967 | =item current_ns_prefixes
|
---|
| 968 |
|
---|
| 969 | Return a list of currently bound namespace prefixes. The order of the
|
---|
| 970 | the prefixes in the list has no meaning. If the default namespace is
|
---|
| 971 | currently bound, '#default' appears in the list.
|
---|
| 972 |
|
---|
| 973 | =item recognized_string
|
---|
| 974 |
|
---|
| 975 | Returns the string from the document that was recognized in order to call
|
---|
| 976 | the current handler. For instance, when called from a start handler, it
|
---|
| 977 | will give us the the start-tag string. The string is encoded in UTF-8.
|
---|
| 978 | This method doesn't return a meaningful string inside declaration handlers.
|
---|
| 979 |
|
---|
| 980 | =item original_string
|
---|
| 981 |
|
---|
| 982 | Returns the verbatim string from the document that was recognized in
|
---|
| 983 | order to call the current handler. The string is in the original document
|
---|
| 984 | encoding. This method doesn't return a meaningful string inside declaration
|
---|
| 985 | handlers.
|
---|
| 986 |
|
---|
| 987 | =item default_current
|
---|
| 988 |
|
---|
| 989 | When called from a handler, causes the sequence of characters that generated
|
---|
| 990 | the corresponding event to be sent to the default handler (if one is
|
---|
| 991 | registered). Use of this method is deprecated in favor the recognized_string
|
---|
| 992 | method, which you can use without installing a default handler. This
|
---|
| 993 | method doesn't deliver a meaningful string to the default handler when
|
---|
| 994 | called from inside declaration handlers.
|
---|
| 995 |
|
---|
| 996 | =item xpcroak(message)
|
---|
| 997 |
|
---|
| 998 | Concatenate onto the given message the current line number within the
|
---|
| 999 | XML document plus the message implied by ErrorContext. Then croak with
|
---|
| 1000 | the formed message.
|
---|
| 1001 |
|
---|
| 1002 | =item xpcarp(message)
|
---|
| 1003 |
|
---|
| 1004 | Concatenate onto the given message the current line number within the
|
---|
| 1005 | XML document plus the message implied by ErrorContext. Then carp with
|
---|
| 1006 | the formed message.
|
---|
| 1007 |
|
---|
| 1008 | =item current_line
|
---|
| 1009 |
|
---|
| 1010 | Returns the line number of the current position of the parse.
|
---|
| 1011 |
|
---|
| 1012 | =item current_column
|
---|
| 1013 |
|
---|
| 1014 | Returns the column number of the current position of the parse.
|
---|
| 1015 |
|
---|
| 1016 | =item current_byte
|
---|
| 1017 |
|
---|
| 1018 | Returns the current position of the parse.
|
---|
| 1019 |
|
---|
| 1020 | =item base([NEWBASE]);
|
---|
| 1021 |
|
---|
| 1022 | Returns the current value of the base for resolving relative URIs. If
|
---|
| 1023 | NEWBASE is supplied, changes the base to that value.
|
---|
| 1024 |
|
---|
| 1025 | =item context
|
---|
| 1026 |
|
---|
| 1027 | Returns a list of element names that represent open elements, with the
|
---|
| 1028 | last one being the innermost. Inside start and end tag handlers, this
|
---|
| 1029 | will be the tag of the parent element.
|
---|
| 1030 |
|
---|
| 1031 | =item current_element
|
---|
| 1032 |
|
---|
| 1033 | Returns the name of the innermost currently opened element. Inside
|
---|
| 1034 | start or end handlers, returns the parent of the element associated
|
---|
| 1035 | with those tags.
|
---|
| 1036 |
|
---|
| 1037 | =item in_element(NAME)
|
---|
| 1038 |
|
---|
| 1039 | Returns true if NAME is equal to the name of the innermost currently opened
|
---|
| 1040 | element. If namespace processing is being used and you want to check
|
---|
| 1041 | against a name that may be in a namespace, then use the generate_ns_name
|
---|
| 1042 | method to create the NAME argument.
|
---|
| 1043 |
|
---|
| 1044 | =item within_element(NAME)
|
---|
| 1045 |
|
---|
| 1046 | Returns the number of times the given name appears in the context list.
|
---|
| 1047 | If namespace processing is being used and you want to check
|
---|
| 1048 | against a name that may be in a namespace, then use the generate_ns_name
|
---|
| 1049 | method to create the NAME argument.
|
---|
| 1050 |
|
---|
| 1051 | =item depth
|
---|
| 1052 |
|
---|
| 1053 | Returns the size of the context list.
|
---|
| 1054 |
|
---|
| 1055 | =item element_index
|
---|
| 1056 |
|
---|
| 1057 | Returns an integer that is the depth-first visit order of the current
|
---|
| 1058 | element. This will be zero outside of the root element. For example,
|
---|
| 1059 | this will return 1 when called from the start handler for the root element
|
---|
| 1060 | start tag.
|
---|
| 1061 |
|
---|
| 1062 | =item skip_until(INDEX)
|
---|
| 1063 |
|
---|
| 1064 | INDEX is an integer that represents an element index. When this method
|
---|
| 1065 | is called, all handlers are suspended until the start tag for an element
|
---|
| 1066 | that has an index number equal to INDEX is seen. If a start handler has
|
---|
| 1067 | been set, then this is the first tag that the start handler will see
|
---|
| 1068 | after skip_until has been called.
|
---|
| 1069 |
|
---|
| 1070 |
|
---|
| 1071 | =item position_in_context(LINES)
|
---|
| 1072 |
|
---|
| 1073 | Returns a string that shows the current parse position. LINES should be
|
---|
| 1074 | an integer >= 0 that represents the number of lines on either side of the
|
---|
| 1075 | current parse line to place into the returned string.
|
---|
| 1076 |
|
---|
| 1077 | =item xml_escape(TEXT [, CHAR [, CHAR ...]])
|
---|
| 1078 |
|
---|
| 1079 | Returns TEXT with markup characters turned into character entities. Any
|
---|
| 1080 | additional characters provided as arguments are also turned into character
|
---|
| 1081 | references where found in TEXT.
|
---|
| 1082 |
|
---|
| 1083 | =item parse (SOURCE)
|
---|
| 1084 |
|
---|
| 1085 | The SOURCE parameter should either be a string containing the whole XML
|
---|
| 1086 | document, or it should be an open IO::Handle. Only a single document
|
---|
| 1087 | may be parsed for a given instance of XML::Parser::Expat, so this will croak
|
---|
| 1088 | if it's been called previously for this instance.
|
---|
| 1089 |
|
---|
| 1090 | =item parsestring(XML_DOC_STRING)
|
---|
| 1091 |
|
---|
| 1092 | Parses the given string as an XML document. Only a single document may be
|
---|
| 1093 | parsed for a given instance of XML::Parser::Expat, so this will die if either
|
---|
| 1094 | parsestring or parsefile has been called for this instance previously.
|
---|
| 1095 |
|
---|
| 1096 | This method is deprecated in favor of the parse method.
|
---|
| 1097 |
|
---|
| 1098 | =item parsefile(FILENAME)
|
---|
| 1099 |
|
---|
| 1100 | Parses the XML document in the given file. Will die if parsestring or
|
---|
| 1101 | parsefile has been called previously for this instance.
|
---|
| 1102 |
|
---|
| 1103 | =item is_defaulted(ATTNAME)
|
---|
| 1104 |
|
---|
| 1105 | NO LONGER WORKS. To find out if an attribute is defaulted please use
|
---|
| 1106 | the specified_attr method.
|
---|
| 1107 |
|
---|
| 1108 | =item specified_attr
|
---|
| 1109 |
|
---|
| 1110 | When the start handler receives lists of attributes and values, the
|
---|
| 1111 | non-defaulted (i.e. explicitly specified) attributes occur in the list
|
---|
| 1112 | first. This method returns the number of specified items in the list.
|
---|
| 1113 | So if this number is equal to the length of the list, there were no
|
---|
| 1114 | defaulted values. Otherwise the number points to the index of the
|
---|
| 1115 | first defaulted attribute name.
|
---|
| 1116 |
|
---|
| 1117 | =item finish
|
---|
| 1118 |
|
---|
| 1119 | Unsets all handlers (including internal ones that set context), but expat
|
---|
| 1120 | continues parsing to the end of the document or until it finds an error.
|
---|
| 1121 | It should finish up a lot faster than with the handlers set.
|
---|
| 1122 |
|
---|
| 1123 | =item release
|
---|
| 1124 |
|
---|
| 1125 | There are data structures used by XML::Parser::Expat that have circular
|
---|
| 1126 | references. This means that these structures will never be garbage
|
---|
| 1127 | collected unless these references are explicitly broken. Calling this
|
---|
| 1128 | method breaks those references (and makes the instance unusable.)
|
---|
| 1129 |
|
---|
| 1130 | Normally, higher level calls handle this for you, but if you are using
|
---|
| 1131 | XML::Parser::Expat directly, then it's your responsibility to call it.
|
---|
| 1132 |
|
---|
| 1133 | =back
|
---|
| 1134 |
|
---|
| 1135 | =head2 XML::Parser::ContentModel Methods
|
---|
| 1136 |
|
---|
| 1137 | The element declaration handlers are passed objects of this class as the
|
---|
| 1138 | content model of the element declaration. They also represent content
|
---|
| 1139 | particles, components of a content model.
|
---|
| 1140 |
|
---|
| 1141 | When referred to as a string, these objects are automagicly converted to a
|
---|
| 1142 | string representation of the model (or content particle).
|
---|
| 1143 |
|
---|
| 1144 | =over 4
|
---|
| 1145 |
|
---|
| 1146 | =item isempty
|
---|
| 1147 |
|
---|
| 1148 | This method returns true if the object is "EMPTY", false otherwise.
|
---|
| 1149 |
|
---|
| 1150 | =item isany
|
---|
| 1151 |
|
---|
| 1152 | This method returns true if the object is "ANY", false otherwise.
|
---|
| 1153 |
|
---|
| 1154 | =item ismixed
|
---|
| 1155 |
|
---|
| 1156 | This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*",
|
---|
| 1157 | false otherwise.
|
---|
| 1158 |
|
---|
| 1159 | =item isname
|
---|
| 1160 |
|
---|
| 1161 | This method returns if the object is an element name.
|
---|
| 1162 |
|
---|
| 1163 | =item ischoice
|
---|
| 1164 |
|
---|
| 1165 | This method returns true if the object is a choice of content particles.
|
---|
| 1166 |
|
---|
| 1167 |
|
---|
| 1168 | =item isseq
|
---|
| 1169 |
|
---|
| 1170 | This method returns true if the object is a sequence of content particles.
|
---|
| 1171 |
|
---|
| 1172 | =item quant
|
---|
| 1173 |
|
---|
| 1174 | This method returns undef or a string representing the quantifier
|
---|
| 1175 | ('?', '*', '+') associated with the model or particle.
|
---|
| 1176 |
|
---|
| 1177 | =item children
|
---|
| 1178 |
|
---|
| 1179 | This method returns undef or (for mixed, choice, and sequence types)
|
---|
| 1180 | an array of component content particles. There will always be at least
|
---|
| 1181 | one component for choices and sequences, but for a mixed content model
|
---|
| 1182 | of pure PCDATA, "(#PCDATA)", then an undef is returned.
|
---|
| 1183 |
|
---|
| 1184 | =back
|
---|
| 1185 |
|
---|
| 1186 | =head2 XML::Parser::ExpatNB Methods
|
---|
| 1187 |
|
---|
| 1188 | The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used
|
---|
| 1189 | for non-blocking access to the expat library. It does not support the parse,
|
---|
| 1190 | parsestring, or parsefile methods, but it does have these additional methods:
|
---|
| 1191 |
|
---|
| 1192 | =over 4
|
---|
| 1193 |
|
---|
| 1194 | =item parse_more(DATA)
|
---|
| 1195 |
|
---|
| 1196 | Feed expat more text to munch on.
|
---|
| 1197 |
|
---|
| 1198 | =item parse_done
|
---|
| 1199 |
|
---|
| 1200 | Tell expat that it's gotten the whole document.
|
---|
| 1201 |
|
---|
| 1202 | =back
|
---|
| 1203 |
|
---|
| 1204 | =head1 FUNCTIONS
|
---|
| 1205 |
|
---|
| 1206 | =over 4
|
---|
| 1207 |
|
---|
| 1208 | =item XML::Parser::Expat::load_encoding(ENCODING)
|
---|
| 1209 |
|
---|
| 1210 | Load an external encoding. ENCODING is either the name of an encoding or
|
---|
| 1211 | the name of a file. The basename is converted to lowercase and a '.enc'
|
---|
| 1212 | extension is appended unless there's one already there. Then, unless
|
---|
| 1213 | it's an absolute pathname (i.e. begins with '/'), the first file by that
|
---|
| 1214 | name discovered in the @Encoding_Path path list is used.
|
---|
| 1215 |
|
---|
| 1216 | The encoding in the file is loaded and kept in the %Encoding_Table
|
---|
| 1217 | table. Earlier encodings of the same name are replaced.
|
---|
| 1218 |
|
---|
| 1219 | This function is automatically called by expat when it encounters an encoding
|
---|
| 1220 | it doesn't know about. Expat shouldn't call this twice for the same
|
---|
| 1221 | encoding name. The only reason users should use this function is to
|
---|
| 1222 | explicitly load an encoding not contained in the @Encoding_Path list.
|
---|
| 1223 |
|
---|
| 1224 | =back
|
---|
| 1225 |
|
---|
| 1226 | =head1 AUTHORS
|
---|
| 1227 |
|
---|
| 1228 | Larry Wall <F<[email protected]>> wrote version 1.0.
|
---|
| 1229 |
|
---|
| 1230 | Clark Cooper <F<[email protected]>> picked up support, changed the API
|
---|
| 1231 | for this version (2.x), provided documentation, and added some standard
|
---|
| 1232 | package features.
|
---|
| 1233 |
|
---|
| 1234 | =cut
|
---|