1 | package XML::Parser::Expat;
|
---|
2 |
|
---|
3 | require 5.004;
|
---|
4 |
|
---|
5 | use strict;
|
---|
6 | use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path
|
---|
7 | $have_File_Spec);
|
---|
8 | use Carp;
|
---|
9 |
|
---|
10 | require DynaLoader;
|
---|
11 |
|
---|
12 | @ISA = qw(DynaLoader);
|
---|
13 | $VERSION = "2.41";
|
---|
14 |
|
---|
15 | $have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm';
|
---|
16 |
|
---|
17 | %Encoding_Table = ();
|
---|
18 | if ($have_File_Spec) {
|
---|
19 | @Encoding_Path = (grep(-d $_,
|
---|
20 | map(File::Spec->catdir($_, qw(XML Parser Encodings)),
|
---|
21 | @INC)),
|
---|
22 | File::Spec->curdir);
|
---|
23 | }
|
---|
24 | else {
|
---|
25 | @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.');
|
---|
26 | }
|
---|
27 |
|
---|
28 |
|
---|
29 | bootstrap XML::Parser::Expat $VERSION;
|
---|
30 |
|
---|
31 | %Handler_Setters = (
|
---|
32 | Start => \&SetStartElementHandler,
|
---|
33 | End => \&SetEndElementHandler,
|
---|
34 | Char => \&SetCharacterDataHandler,
|
---|
35 | Proc => \&SetProcessingInstructionHandler,
|
---|
36 | Comment => \&SetCommentHandler,
|
---|
37 | CdataStart => \&SetStartCdataHandler,
|
---|
38 | CdataEnd => \&SetEndCdataHandler,
|
---|
39 | Default => \&SetDefaultHandler,
|
---|
40 | Unparsed => \&SetUnparsedEntityDeclHandler,
|
---|
41 | Notation => \&SetNotationDeclHandler,
|
---|
42 | ExternEnt => \&SetExternalEntityRefHandler,
|
---|
43 | ExternEntFin => \&SetExtEntFinishHandler,
|
---|
44 | Entity => \&SetEntityDeclHandler,
|
---|
45 | Element => \&SetElementDeclHandler,
|
---|
46 | Attlist => \&SetAttListDeclHandler,
|
---|
47 | Doctype => \&SetDoctypeHandler,
|
---|
48 | DoctypeFin => \&SetEndDoctypeHandler,
|
---|
49 | XMLDecl => \&SetXMLDeclHandler
|
---|
50 | );
|
---|
51 |
|
---|
52 | sub new {
|
---|
53 | my ($class, %args) = @_;
|
---|
54 | my $self = bless \%args, $_[0];
|
---|
55 | $args{_State_} = 0;
|
---|
56 | $args{Context} = [];
|
---|
57 | $args{Namespaces} ||= 0;
|
---|
58 | $args{ErrorMessage} ||= '';
|
---|
59 | if ($args{Namespaces}) {
|
---|
60 | $args{Namespace_Table} = {};
|
---|
61 | $args{Namespace_List} = [undef];
|
---|
62 | $args{Prefix_Table} = {};
|
---|
63 | $args{New_Prefixes} = [];
|
---|
64 | }
|
---|
65 | $args{_Setters} = \%Handler_Setters;
|
---|
66 | $args{Parser} = ParserCreate($self, $args{ProtocolEncoding},
|
---|
67 | $args{Namespaces});
|
---|
68 | $self;
|
---|
69 | }
|
---|
70 |
|
---|
71 | sub load_encoding {
|
---|
72 | my ($file) = @_;
|
---|
73 |
|
---|
74 | $file =~ s!([^/]+)$!\L$1\E!;
|
---|
75 | $file .= '.enc' unless $file =~ /\.enc$/;
|
---|
76 | unless ($file =~ m!^/!) {
|
---|
77 | foreach (@Encoding_Path) {
|
---|
78 | my $tmp = ($have_File_Spec
|
---|
79 | ? File::Spec->catfile($_, $file)
|
---|
80 | : "$_/$file");
|
---|
81 | if (-e $tmp) {
|
---|
82 | $file = $tmp;
|
---|
83 | last;
|
---|
84 | }
|
---|
85 | }
|
---|
86 | }
|
---|
87 |
|
---|
88 | local(*ENC);
|
---|
89 | open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n");
|
---|
90 | binmode(ENC);
|
---|
91 | my $data;
|
---|
92 | my $br = sysread(ENC, $data, -s $file);
|
---|
93 | croak("Trouble reading $file:\n$!\n")
|
---|
94 | unless defined($br);
|
---|
95 | close(ENC);
|
---|
96 |
|
---|
97 | my $name = LoadEncoding($data, $br);
|
---|
98 | croak("$file isn't an encmap file")
|
---|
99 | unless defined($name);
|
---|
100 |
|
---|
101 | $name;
|
---|
102 | } # End load_encoding
|
---|
103 |
|
---|
104 | sub setHandlers {
|
---|
105 | my ($self, @handler_pairs) = @_;
|
---|
106 |
|
---|
107 | croak("Uneven number of arguments to setHandlers method")
|
---|
108 | if (int(@handler_pairs) & 1);
|
---|
109 |
|
---|
110 | my @ret;
|
---|
111 |
|
---|
112 | while (@handler_pairs) {
|
---|
113 | my $type = shift @handler_pairs;
|
---|
114 | my $handler = shift @handler_pairs;
|
---|
115 | croak "Handler for $type not a Code ref"
|
---|
116 | unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE');
|
---|
117 |
|
---|
118 | my $hndl = $self->{_Setters}->{$type};
|
---|
119 |
|
---|
120 | unless (defined($hndl)) {
|
---|
121 | my @types = sort keys %{$self->{_Setters}};
|
---|
122 | croak("Unknown Expat handler type: $type\n Valid types: @types");
|
---|
123 | }
|
---|
124 |
|
---|
125 | my $old = &$hndl($self->{Parser}, $handler);
|
---|
126 | push (@ret, $type, $old);
|
---|
127 | }
|
---|
128 |
|
---|
129 | return @ret;
|
---|
130 | }
|
---|
131 |
|
---|
132 | sub xpcroak
|
---|
133 | {
|
---|
134 | my ($self, $message) = @_;
|
---|
135 |
|
---|
136 | my $eclines = $self->{ErrorContext};
|
---|
137 | my $line = GetCurrentLineNumber($_[0]->{Parser});
|
---|
138 | $message .= " at line $line";
|
---|
139 | $message .= ":\n" . $self->position_in_context($eclines)
|
---|
140 | if defined($eclines);
|
---|
141 | croak $message;
|
---|
142 | }
|
---|
143 |
|
---|
144 | sub xpcarp {
|
---|
145 | my ($self, $message) = @_;
|
---|
146 |
|
---|
147 | my $eclines = $self->{ErrorContext};
|
---|
148 | my $line = GetCurrentLineNumber($_[0]->{Parser});
|
---|
149 | $message .= " at line $line";
|
---|
150 | $message .= ":\n" . $self->position_in_context($eclines)
|
---|
151 | if defined($eclines);
|
---|
152 | carp $message;
|
---|
153 | }
|
---|
154 |
|
---|
155 | sub default_current {
|
---|
156 | my $self = shift;
|
---|
157 | if ($self->{_State_} == 1) {
|
---|
158 | return DefaultCurrent($self->{Parser});
|
---|
159 | }
|
---|
160 | }
|
---|
161 |
|
---|
162 | sub recognized_string {
|
---|
163 | my $self = shift;
|
---|
164 | if ($self->{_State_} == 1) {
|
---|
165 | return RecognizedString($self->{Parser});
|
---|
166 | }
|
---|
167 | }
|
---|
168 |
|
---|
169 | sub original_string {
|
---|
170 | my $self = shift;
|
---|
171 | if ($self->{_State_} == 1) {
|
---|
172 | return OriginalString($self->{Parser});
|
---|
173 | }
|
---|
174 | }
|
---|
175 |
|
---|
176 | sub current_line {
|
---|
177 | my $self = shift;
|
---|
178 | if ($self->{_State_} == 1) {
|
---|
179 | return GetCurrentLineNumber($self->{Parser});
|
---|
180 | }
|
---|
181 | }
|
---|
182 |
|
---|
183 | sub current_column {
|
---|
184 | my $self = shift;
|
---|
185 | if ($self->{_State_} == 1) {
|
---|
186 | return GetCurrentColumnNumber($self->{Parser});
|
---|
187 | }
|
---|
188 | }
|
---|
189 |
|
---|
190 | sub current_byte {
|
---|
191 | my $self = shift;
|
---|
192 | if ($self->{_State_} == 1) {
|
---|
193 | return GetCurrentByteIndex($self->{Parser});
|
---|
194 | }
|
---|
195 | }
|
---|
196 |
|
---|
197 | sub base {
|
---|
198 | my ($self, $newbase) = @_;
|
---|
199 | my $p = $self->{Parser};
|
---|
200 | my $oldbase = GetBase($p);
|
---|
201 | SetBase($p, $newbase) if @_ > 1;
|
---|
202 | return $oldbase;
|
---|
203 | }
|
---|
204 |
|
---|
205 | sub context {
|
---|
206 | my $ctx = $_[0]->{Context};
|
---|
207 | @$ctx;
|
---|
208 | }
|
---|
209 |
|
---|
210 | sub current_element {
|
---|
211 | my ($self) = @_;
|
---|
212 | @{$self->{Context}} ? $self->{Context}->[-1] : undef;
|
---|
213 | }
|
---|
214 |
|
---|
215 | sub in_element {
|
---|
216 | my ($self, $element) = @_;
|
---|
217 | @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element)
|
---|
218 | : undef;
|
---|
219 | }
|
---|
220 |
|
---|
221 | sub within_element {
|
---|
222 | my ($self, $element) = @_;
|
---|
223 | my $cnt = 0;
|
---|
224 | foreach (@{$self->{Context}}) {
|
---|
225 | $cnt++ if $self->eq_name($_, $element);
|
---|
226 | }
|
---|
227 | return $cnt;
|
---|
228 | }
|
---|
229 |
|
---|
230 | sub depth {
|
---|
231 | my ($self) = @_;
|
---|
232 | int(@{$self->{Context}});
|
---|
233 | }
|
---|
234 |
|
---|
235 | sub element_index {
|
---|
236 | my ($self) = @_;
|
---|
237 |
|
---|
238 | if ($self->{_State_} == 1) {
|
---|
239 | return ElementIndex($self->{Parser});
|
---|
240 | }
|
---|
241 | }
|
---|
242 |
|
---|
243 | ################
|
---|
244 | # Namespace methods
|
---|
245 |
|
---|
246 | sub namespace {
|
---|
247 | my ($self, $name) = @_;
|
---|
248 | local($^W) = 0;
|
---|
249 | $self->{Namespace_List}->[int($name)];
|
---|
250 | }
|
---|
251 |
|
---|
252 | sub eq_name {
|
---|
253 | my ($self, $nm1, $nm2) = @_;
|
---|
254 | local($^W) = 0;
|
---|
255 |
|
---|
256 | int($nm1) == int($nm2) and $nm1 eq $nm2;
|
---|
257 | }
|
---|
258 |
|
---|
259 | sub generate_ns_name {
|
---|
260 | my ($self, $name, $namespace) = @_;
|
---|
261 |
|
---|
262 | $namespace ?
|
---|
263 | GenerateNSName($name, $namespace, $self->{Namespace_Table},
|
---|
264 | $self->{Namespace_List})
|
---|
265 | : $name;
|
---|
266 | }
|
---|
267 |
|
---|
268 | sub new_ns_prefixes {
|
---|
269 | my ($self) = @_;
|
---|
270 | if ($self->{Namespaces}) {
|
---|
271 | return @{$self->{New_Prefixes}};
|
---|
272 | }
|
---|
273 | return ();
|
---|
274 | }
|
---|
275 |
|
---|
276 | sub expand_ns_prefix {
|
---|
277 | my ($self, $prefix) = @_;
|
---|
278 |
|
---|
279 | if ($self->{Namespaces}) {
|
---|
280 | my $stack = $self->{Prefix_Table}->{$prefix};
|
---|
281 | return (defined($stack) and @$stack) ? $stack->[-1] : undef;
|
---|
282 | }
|
---|
283 |
|
---|
284 | return undef;
|
---|
285 | }
|
---|
286 |
|
---|
287 | sub current_ns_prefixes {
|
---|
288 | my ($self) = @_;
|
---|
289 |
|
---|
290 | if ($self->{Namespaces}) {
|
---|
291 | my %set = %{$self->{Prefix_Table}};
|
---|
292 |
|
---|
293 | if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) {
|
---|
294 | delete $set{'#default'};
|
---|
295 | }
|
---|
296 |
|
---|
297 | return keys %set;
|
---|
298 | }
|
---|
299 |
|
---|
300 | return ();
|
---|
301 | }
|
---|
302 |
|
---|
303 |
|
---|
304 | ################################################################
|
---|
305 | # Namespace declaration handlers
|
---|
306 | #
|
---|
307 |
|
---|
308 | sub NamespaceStart {
|
---|
309 | my ($self, $prefix, $uri) = @_;
|
---|
310 |
|
---|
311 | $prefix = '#default' unless defined $prefix;
|
---|
312 | my $stack = $self->{Prefix_Table}->{$prefix};
|
---|
313 |
|
---|
314 | if (defined $stack) {
|
---|
315 | push(@$stack, $uri);
|
---|
316 | }
|
---|
317 | else {
|
---|
318 | $self->{Prefix_Table}->{$prefix} = [$uri];
|
---|
319 | }
|
---|
320 |
|
---|
321 | # The New_Prefixes list gets emptied at end of startElement function
|
---|
322 | # in Expat.xs
|
---|
323 |
|
---|
324 | push(@{$self->{New_Prefixes}}, $prefix);
|
---|
325 | }
|
---|
326 |
|
---|
327 | sub NamespaceEnd {
|
---|
328 | my ($self, $prefix) = @_;
|
---|
329 |
|
---|
330 | $prefix = '#default' unless defined $prefix;
|
---|
331 |
|
---|
332 | my $stack = $self->{Prefix_Table}->{$prefix};
|
---|
333 | if (@$stack > 1) {
|
---|
334 | pop(@$stack);
|
---|
335 | }
|
---|
336 | else {
|
---|
337 | delete $self->{Prefix_Table}->{$prefix};
|
---|
338 | }
|
---|
339 | }
|
---|
340 |
|
---|
341 | ################
|
---|
342 |
|
---|
343 | sub specified_attr {
|
---|
344 | my $self = shift;
|
---|
345 |
|
---|
346 | if ($self->{_State_} == 1) {
|
---|
347 | return GetSpecifiedAttributeCount($self->{Parser});
|
---|
348 | }
|
---|
349 | }
|
---|
350 |
|
---|
351 | sub finish {
|
---|
352 | my ($self) = @_;
|
---|
353 | if ($self->{_State_} == 1) {
|
---|
354 | my $parser = $self->{Parser};
|
---|
355 | UnsetAllHandlers($parser);
|
---|
356 | }
|
---|
357 | }
|
---|
358 |
|
---|
359 | sub position_in_context {
|
---|
360 | my ($self, $lines) = @_;
|
---|
361 | if ($self->{_State_} == 1) {
|
---|
362 | my $parser = $self->{Parser};
|
---|
363 | my ($string, $linepos) = PositionContext($parser, $lines);
|
---|
364 |
|
---|
365 | return '' unless defined($string);
|
---|
366 |
|
---|
367 | my $col = GetCurrentColumnNumber($parser);
|
---|
368 | my $ptr = ('=' x ($col - 1)) . '^' . "\n";
|
---|
369 | my $ret;
|
---|
370 | my $dosplit = $linepos < length($string);
|
---|
371 |
|
---|
372 | $string .= "\n" unless $string =~ /\n$/;
|
---|
373 |
|
---|
374 | if ($dosplit) {
|
---|
375 | $ret = substr($string, 0, $linepos) . $ptr
|
---|
376 | . substr($string, $linepos);
|
---|
377 | } else {
|
---|
378 | $ret = $string . $ptr;
|
---|
379 | }
|
---|
380 |
|
---|
381 | return $ret;
|
---|
382 | }
|
---|
383 | }
|
---|
384 |
|
---|
385 | sub xml_escape {
|
---|
386 | my $self = shift;
|
---|
387 | my $text = shift;
|
---|
388 |
|
---|
389 | study $text;
|
---|
390 | $text =~ s/\&/\&/g;
|
---|
391 | $text =~ s/</\</g;
|
---|
392 | foreach (@_) {
|
---|
393 | croak "xml_escape: '$_' isn't a single character" if length($_) > 1;
|
---|
394 |
|
---|
395 | if ($_ eq '>') {
|
---|
396 | $text =~ s/>/\>/g;
|
---|
397 | }
|
---|
398 | elsif ($_ eq '"') {
|
---|
399 | $text =~ s/\"/\"/;
|
---|
400 | }
|
---|
401 | elsif ($_ eq "'") {
|
---|
402 | $text =~ s/\'/\'/;
|
---|
403 | }
|
---|
404 | else {
|
---|
405 | my $rep = '&#' . sprintf('x%X', ord($_)) . ';';
|
---|
406 | if (/\W/) {
|
---|
407 | my $ptrn = "\\$_";
|
---|
408 | $text =~ s/$ptrn/$rep/g;
|
---|
409 | }
|
---|
410 | else {
|
---|
411 | $text =~ s/$_/$rep/g;
|
---|
412 | }
|
---|
413 | }
|
---|
414 | }
|
---|
415 | $text;
|
---|
416 | }
|
---|
417 |
|
---|
418 | sub skip_until {
|
---|
419 | my $self = shift;
|
---|
420 | if ($self->{_State_} <= 1) {
|
---|
421 | SkipUntil($self->{Parser}, $_[0]);
|
---|
422 | }
|
---|
423 | }
|
---|
424 |
|
---|
425 | sub release {
|
---|
426 | my $self = shift;
|
---|
427 | ParserRelease($self->{Parser});
|
---|
428 | }
|
---|
429 |
|
---|
430 | sub DESTROY {
|
---|
431 | my $self = shift;
|
---|
432 | ParserFree($self->{Parser});
|
---|
433 | }
|
---|
434 |
|
---|
435 | sub parse {
|
---|
436 | my $self = shift;
|
---|
437 | my $arg = shift;
|
---|
438 | croak "Parse already in progress (Expat)" if $self->{_State_};
|
---|
439 | $self->{_State_} = 1;
|
---|
440 | my $parser = $self->{Parser};
|
---|
441 | my $ioref;
|
---|
442 | my $result = 0;
|
---|
443 |
|
---|
444 | if (defined $arg) {
|
---|
445 | local *@;
|
---|
446 | if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) {
|
---|
447 | $ioref = $arg;
|
---|
448 | } elsif ($] < 5.008 and defined tied($arg)) {
|
---|
449 | require IO::Handle;
|
---|
450 | $ioref = $arg;
|
---|
451 | }
|
---|
452 | else {
|
---|
453 | require IO::Handle;
|
---|
454 | eval {
|
---|
455 | no strict 'refs';
|
---|
456 | $ioref = *{$arg}{IO} if defined *{$arg};
|
---|
457 | };
|
---|
458 | }
|
---|
459 | }
|
---|
460 |
|
---|
461 | if (defined($ioref)) {
|
---|
462 | my $delim = $self->{Stream_Delimiter};
|
---|
463 | my $prev_rs;
|
---|
464 | my $ioclass = ref $ioref;
|
---|
465 | $ioclass = "IO::Handle" if !length $ioclass;
|
---|
466 |
|
---|
467 | $prev_rs = $ioclass->input_record_separator("\n$delim\n")
|
---|
468 | if defined($delim);
|
---|
469 |
|
---|
470 | $result = ParseStream($parser, $ioref, $delim);
|
---|
471 |
|
---|
472 | $ioclass->input_record_separator($prev_rs)
|
---|
473 | if defined($delim);
|
---|
474 | } else {
|
---|
475 | $result = ParseString($parser, $arg);
|
---|
476 | }
|
---|
477 |
|
---|
478 | $self->{_State_} = 2;
|
---|
479 | $result or croak $self->{ErrorMessage};
|
---|
480 | }
|
---|
481 |
|
---|
482 | sub parsestring {
|
---|
483 | my $self = shift;
|
---|
484 | $self->parse(@_);
|
---|
485 | }
|
---|
486 |
|
---|
487 | sub parsefile {
|
---|
488 | my $self = shift;
|
---|
489 | croak "Parser has already been used" if $self->{_State_};
|
---|
490 | local(*FILE);
|
---|
491 | open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!";
|
---|
492 | binmode(FILE);
|
---|
493 | my $ret = $self->parse(*FILE);
|
---|
494 | close(FILE);
|
---|
495 | $ret;
|
---|
496 | }
|
---|
497 |
|
---|
498 | ################################################################
|
---|
499 | package #hide from PAUSE
|
---|
500 | XML::Parser::ContentModel;
|
---|
501 | use overload '""' => \&asString, 'eq' => \&thiseq;
|
---|
502 |
|
---|
503 | sub EMPTY () {1}
|
---|
504 | sub ANY () {2}
|
---|
505 | sub MIXED () {3}
|
---|
506 | sub NAME () {4}
|
---|
507 | sub CHOICE () {5}
|
---|
508 | sub SEQ () {6}
|
---|
509 |
|
---|
510 |
|
---|
511 | sub isempty {
|
---|
512 | return $_[0]->{Type} == EMPTY;
|
---|
513 | }
|
---|
514 |
|
---|
515 | sub isany {
|
---|
516 | return $_[0]->{Type} == ANY;
|
---|
517 | }
|
---|
518 |
|
---|
519 | sub ismixed {
|
---|
520 | return $_[0]->{Type} == MIXED;
|
---|
521 | }
|
---|
522 |
|
---|
523 | sub isname {
|
---|
524 | return $_[0]->{Type} == NAME;
|
---|
525 | }
|
---|
526 |
|
---|
527 | sub name {
|
---|
528 | return $_[0]->{Tag};
|
---|
529 | }
|
---|
530 |
|
---|
531 | sub ischoice {
|
---|
532 | return $_[0]->{Type} == CHOICE;
|
---|
533 | }
|
---|
534 |
|
---|
535 | sub isseq {
|
---|
536 | return $_[0]->{Type} == SEQ;
|
---|
537 | }
|
---|
538 |
|
---|
539 | sub quant {
|
---|
540 | return $_[0]->{Quant};
|
---|
541 | }
|
---|
542 |
|
---|
543 | sub children {
|
---|
544 | my $children = $_[0]->{Children};
|
---|
545 | if (defined $children) {
|
---|
546 | return @$children;
|
---|
547 | }
|
---|
548 | return undef;
|
---|
549 | }
|
---|
550 |
|
---|
551 | sub asString {
|
---|
552 | my ($self) = @_;
|
---|
553 | my $ret;
|
---|
554 |
|
---|
555 | if ($self->{Type} == NAME) {
|
---|
556 | $ret = $self->{Tag};
|
---|
557 | }
|
---|
558 | elsif ($self->{Type} == EMPTY) {
|
---|
559 | return "EMPTY";
|
---|
560 | }
|
---|
561 | elsif ($self->{Type} == ANY) {
|
---|
562 | return "ANY";
|
---|
563 | }
|
---|
564 | elsif ($self->{Type} == MIXED) {
|
---|
565 | $ret = '(#PCDATA';
|
---|
566 | foreach (@{$self->{Children}}) {
|
---|
567 | $ret .= '|' . $_;
|
---|
568 | }
|
---|
569 | $ret .= ')';
|
---|
570 | }
|
---|
571 | else {
|
---|
572 | my $sep = $self->{Type} == CHOICE ? '|' : ',';
|
---|
573 | $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')';
|
---|
574 | }
|
---|
575 |
|
---|
576 | $ret .= $self->{Quant} if $self->{Quant};
|
---|
577 | return $ret;
|
---|
578 | }
|
---|
579 |
|
---|
580 | sub thiseq {
|
---|
581 | my $self = shift;
|
---|
582 |
|
---|
583 | return $self->asString eq $_[0];
|
---|
584 | }
|
---|
585 |
|
---|
586 | ################################################################
|
---|
587 | package #hide from PAUSE
|
---|
588 | XML::Parser::ExpatNB;
|
---|
589 |
|
---|
590 | use vars qw(@ISA);
|
---|
591 | use Carp;
|
---|
592 |
|
---|
593 | @ISA = qw(XML::Parser::Expat);
|
---|
594 |
|
---|
595 | sub parse {
|
---|
596 | my $self = shift;
|
---|
597 | my $class = ref($self);
|
---|
598 | croak "parse method not supported in $class";
|
---|
599 | }
|
---|
600 |
|
---|
601 | sub parsestring {
|
---|
602 | my $self = shift;
|
---|
603 | my $class = ref($self);
|
---|
604 | croak "parsestring method not supported in $class";
|
---|
605 | }
|
---|
606 |
|
---|
607 | sub parsefile {
|
---|
608 | my $self = shift;
|
---|
609 | my $class = ref($self);
|
---|
610 | croak "parsefile method not supported in $class";
|
---|
611 | }
|
---|
612 |
|
---|
613 | sub parse_more {
|
---|
614 | my ($self, $data) = @_;
|
---|
615 |
|
---|
616 | $self->{_State_} = 1;
|
---|
617 | my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data);
|
---|
618 |
|
---|
619 | croak $self->{ErrorMessage} unless $ret;
|
---|
620 | }
|
---|
621 |
|
---|
622 | sub parse_done {
|
---|
623 | my $self = shift;
|
---|
624 |
|
---|
625 | my $ret = XML::Parser::Expat::ParseDone($self->{Parser});
|
---|
626 | unless ($ret) {
|
---|
627 | my $msg = $self->{ErrorMessage};
|
---|
628 | $self->release;
|
---|
629 | croak $msg;
|
---|
630 | }
|
---|
631 |
|
---|
632 | $self->{_State_} = 2;
|
---|
633 |
|
---|
634 | my $result = $ret;
|
---|
635 | my @result = ();
|
---|
636 | my $final = $self->{FinalHandler};
|
---|
637 | if (defined $final) {
|
---|
638 | if (wantarray) {
|
---|
639 | @result = &$final($self);
|
---|
640 | }
|
---|
641 | else {
|
---|
642 | $result = &$final($self);
|
---|
643 | }
|
---|
644 | }
|
---|
645 |
|
---|
646 | $self->release;
|
---|
647 |
|
---|
648 | return unless defined wantarray;
|
---|
649 | return wantarray ? @result : $result;
|
---|
650 | }
|
---|
651 |
|
---|
652 | ################################################################
|
---|
653 |
|
---|
654 | package #hide from PAUSE
|
---|
655 | XML::Parser::Encinfo;
|
---|
656 |
|
---|
657 | sub DESTROY {
|
---|
658 | my $self = shift;
|
---|
659 | XML::Parser::Expat::FreeEncoding($self);
|
---|
660 | }
|
---|
661 |
|
---|
662 | 1;
|
---|
663 |
|
---|
664 | __END__
|
---|
665 |
|
---|
666 | =head1 NAME
|
---|
667 |
|
---|
668 | XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser
|
---|
669 |
|
---|
670 | =head1 SYNOPSIS
|
---|
671 |
|
---|
672 | use XML::Parser::Expat;
|
---|
673 |
|
---|
674 | $parser = XML::Parser::Expat->new;
|
---|
675 | $parser->setHandlers('Start' => \&sh,
|
---|
676 | 'End' => \&eh,
|
---|
677 | 'Char' => \&ch);
|
---|
678 | open(FOO, '<', 'info.xml') or die "Couldn't open";
|
---|
679 | $parser->parse(*FOO);
|
---|
680 | close(FOO);
|
---|
681 | # $parser->parse('<foo id="me"> here <em>we</em> go </foo>');
|
---|
682 |
|
---|
683 | sub sh
|
---|
684 | {
|
---|
685 | my ($p, $el, %atts) = @_;
|
---|
686 | $p->setHandlers('Char' => \&spec)
|
---|
687 | if ($el eq 'special');
|
---|
688 | ...
|
---|
689 | }
|
---|
690 |
|
---|
691 | sub eh
|
---|
692 | {
|
---|
693 | my ($p, $el) = @_;
|
---|
694 | $p->setHandlers('Char' => \&ch) # Special elements won't contain
|
---|
695 | if ($el eq 'special'); # other special elements
|
---|
696 | ...
|
---|
697 | }
|
---|
698 |
|
---|
699 | =head1 DESCRIPTION
|
---|
700 |
|
---|
701 | This module provides an interface to James Clark's XML parser, expat. As in
|
---|
702 | expat, a single instance of the parser can only parse one document. Calls
|
---|
703 | to parsestring after the first for a given instance will die.
|
---|
704 |
|
---|
705 | Expat (and XML::Parser::Expat) are event based. As the parser recognizes
|
---|
706 | parts of the document (say the start or end of an XML element), then any
|
---|
707 | handlers registered for that type of an event are called with suitable
|
---|
708 | parameters.
|
---|
709 |
|
---|
710 | =head1 METHODS
|
---|
711 |
|
---|
712 | =over 4
|
---|
713 |
|
---|
714 | =item new
|
---|
715 |
|
---|
716 | This is a class method, the constructor for XML::Parser::Expat. Options are
|
---|
717 | passed as keyword value pairs. The recognized options are:
|
---|
718 |
|
---|
719 | =over 4
|
---|
720 |
|
---|
721 | =item * ProtocolEncoding
|
---|
722 |
|
---|
723 | The protocol encoding name. The default is none. The expat built-in
|
---|
724 | encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>.
|
---|
725 | Other encodings may be used if they have encoding maps in one of the
|
---|
726 | directories in the @Encoding_Path list. Setting the protocol encoding
|
---|
727 | overrides any encoding in the XML declaration.
|
---|
728 |
|
---|
729 | =item * Namespaces
|
---|
730 |
|
---|
731 | When this option is given with a true value, then the parser does namespace
|
---|
732 | processing. By default, namespace processing is turned off. When it is
|
---|
733 | turned on, the parser consumes I<xmlns> attributes and strips off prefixes
|
---|
734 | from element and attributes names where those prefixes have a defined
|
---|
735 | namespace. A name's namespace can be found using the L<"namespace"> method
|
---|
736 | and two names can be checked for absolute equality with the L<"eq_name">
|
---|
737 | method.
|
---|
738 |
|
---|
739 | =item * NoExpand
|
---|
740 |
|
---|
741 | Normally, the parser will try to expand references to entities defined in
|
---|
742 | the internal subset. If this option is set to a true value, and a default
|
---|
743 | handler is also set, then the default handler will be called when an
|
---|
744 | entity reference is seen in text. This has no effect if a default handler
|
---|
745 | has not been registered, and it has no effect on the expansion of entity
|
---|
746 | references inside attribute values.
|
---|
747 |
|
---|
748 | =item * Stream_Delimiter
|
---|
749 |
|
---|
750 | This option takes a string value. When this string is found alone on a line
|
---|
751 | while parsing from a stream, then the parse is ended as if it saw an end of
|
---|
752 | file. The intended use is with a stream of xml documents in a MIME multipart
|
---|
753 | format. The string should not contain a trailing newline.
|
---|
754 |
|
---|
755 | =item * ErrorContext
|
---|
756 |
|
---|
757 | When this option is defined, errors are reported in context. The value
|
---|
758 | of ErrorContext should be the number of lines to show on either side of
|
---|
759 | the line in which the error occurred.
|
---|
760 |
|
---|
761 | =item * ParseParamEnt
|
---|
762 |
|
---|
763 | Unless standalone is set to "yes" in the XML declaration, setting this to
|
---|
764 | a true value allows the external DTD to be read, and parameter entities
|
---|
765 | to be parsed and expanded.
|
---|
766 |
|
---|
767 | =item * Base
|
---|
768 |
|
---|
769 | The base to use for relative pathnames or URLs. This can also be done by
|
---|
770 | using the base method.
|
---|
771 |
|
---|
772 | =back
|
---|
773 |
|
---|
774 | =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
|
---|
775 |
|
---|
776 | This method registers handlers for the various events. If no handlers are
|
---|
777 | registered, then a call to parsestring or parsefile will only determine if
|
---|
778 | the corresponding XML document is well formed (by returning without error.)
|
---|
779 | This may be called from within a handler, after the parse has started.
|
---|
780 |
|
---|
781 | Setting a handler to something that evaluates to false unsets that
|
---|
782 | handler.
|
---|
783 |
|
---|
784 | This method returns a list of type, handler pairs corresponding to the
|
---|
785 | input. The handlers returned are the ones that were in effect before the
|
---|
786 | call to setHandlers.
|
---|
787 |
|
---|
788 | The recognized events and the parameters passed to the corresponding
|
---|
789 | handlers are:
|
---|
790 |
|
---|
791 | =over 4
|
---|
792 |
|
---|
793 | =item * Start (Parser, Element [, Attr, Val [,...]])
|
---|
794 |
|
---|
795 | This event is generated when an XML start tag is recognized. Parser is
|
---|
796 | an XML::Parser::Expat instance. Element is the name of the XML element that
|
---|
797 | is opened with the start tag. The Attr & Val pairs are generated for each
|
---|
798 | attribute in the start tag.
|
---|
799 |
|
---|
800 | =item * End (Parser, Element)
|
---|
801 |
|
---|
802 | This event is generated when an XML end tag is recognized. Note that
|
---|
803 | an XML empty tag (<foo/>) generates both a start and an end event.
|
---|
804 |
|
---|
805 | There is always a lower level start and end handler installed that wrap
|
---|
806 | the corresponding callbacks. This is to handle the context mechanism.
|
---|
807 | A consequence of this is that the default handler (see below) will not
|
---|
808 | see a start tag or end tag unless the default_current method is called.
|
---|
809 |
|
---|
810 | =item * Char (Parser, String)
|
---|
811 |
|
---|
812 | This event is generated when non-markup is recognized. The non-markup
|
---|
813 | sequence of characters is in String. A single non-markup sequence of
|
---|
814 | characters may generate multiple calls to this handler. Whatever the
|
---|
815 | encoding of the string in the original document, this is given to the
|
---|
816 | handler in UTF-8.
|
---|
817 |
|
---|
818 | =item * Proc (Parser, Target, Data)
|
---|
819 |
|
---|
820 | This event is generated when a processing instruction is recognized.
|
---|
821 |
|
---|
822 | =item * Comment (Parser, String)
|
---|
823 |
|
---|
824 | This event is generated when a comment is recognized.
|
---|
825 |
|
---|
826 | =item * CdataStart (Parser)
|
---|
827 |
|
---|
828 | This is called at the start of a CDATA section.
|
---|
829 |
|
---|
830 | =item * CdataEnd (Parser)
|
---|
831 |
|
---|
832 | This is called at the end of a CDATA section.
|
---|
833 |
|
---|
834 | =item * Default (Parser, String)
|
---|
835 |
|
---|
836 | This is called for any characters that don't have a registered handler.
|
---|
837 | This includes both characters that are part of markup for which no
|
---|
838 | events are generated (markup declarations) and characters that
|
---|
839 | could generate events, but for which no handler has been registered.
|
---|
840 |
|
---|
841 | Whatever the encoding in the original document, the string is returned to
|
---|
842 | the handler in UTF-8.
|
---|
843 |
|
---|
844 | =item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation)
|
---|
845 |
|
---|
846 | This is called for a declaration of an unparsed entity. Entity is the name
|
---|
847 | of the entity. Base is the base to be used for resolving a relative URI.
|
---|
848 | Sysid is the system id. Pubid is the public id. Notation is the notation
|
---|
849 | name. Base and Pubid may be undefined.
|
---|
850 |
|
---|
851 | =item * Notation (Parser, Notation, Base, Sysid, Pubid)
|
---|
852 |
|
---|
853 | This is called for a declaration of notation. Notation is the notation name.
|
---|
854 | Base is the base to be used for resolving a relative URI. Sysid is the system
|
---|
855 | id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
|
---|
856 |
|
---|
857 | =item * ExternEnt (Parser, Base, Sysid, Pubid)
|
---|
858 |
|
---|
859 | This is called when an external entity is referenced. Base is the base to be
|
---|
860 | used for resolving a relative URI. Sysid is the system id. Pubid is the public
|
---|
861 | id. Base, and Pubid may be undefined.
|
---|
862 |
|
---|
863 | This handler should either return a string, which represents the contents of
|
---|
864 | the external entity, or return an open filehandle that can be read to obtain
|
---|
865 | the contents of the external entity, or return undef, which indicates the
|
---|
866 | external entity couldn't be found and will generate a parse error.
|
---|
867 |
|
---|
868 | If an open filehandle is returned, it must be returned as either a glob
|
---|
869 | (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
|
---|
870 |
|
---|
871 | =item * ExternEntFin (Parser)
|
---|
872 |
|
---|
873 | This is called after an external entity has been parsed. It allows
|
---|
874 | applications to perform cleanup on actions performed in the above
|
---|
875 | ExternEnt handler.
|
---|
876 |
|
---|
877 | =item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam)
|
---|
878 |
|
---|
879 | This is called when an entity is declared. For internal entities, the Val
|
---|
880 | parameter will contain the value and the remaining three parameters will
|
---|
881 | be undefined. For external entities, the Val parameter
|
---|
882 | will be undefined, the Sysid parameter will have the system id, the Pubid
|
---|
883 | parameter will have the public id if it was provided (it will be undefined
|
---|
884 | otherwise), the Ndata parameter will contain the notation for unparsed
|
---|
885 | entities. If this is a parameter entity declaration, then the IsParam
|
---|
886 | parameter is true.
|
---|
887 |
|
---|
888 | Note that this handler and the Unparsed handler above overlap. If both are
|
---|
889 | set, then this handler will not be called for unparsed entities.
|
---|
890 |
|
---|
891 | =item * Element (Parser, Name, Model)
|
---|
892 |
|
---|
893 | The element handler is called when an element declaration is found. Name is
|
---|
894 | the element name, and Model is the content model as an
|
---|
895 | XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods">
|
---|
896 | for methods available for this class.
|
---|
897 |
|
---|
898 | =item * Attlist (Parser, Elname, Attname, Type, Default, Fixed)
|
---|
899 |
|
---|
900 | This handler is called for each attribute in an ATTLIST declaration.
|
---|
901 | So an ATTLIST declaration that has multiple attributes
|
---|
902 | will generate multiple calls to this handler. The Elname parameter is the
|
---|
903 | name of the element with which the attribute is being associated. The Attname
|
---|
904 | parameter is the name of the attribute. Type is the attribute type, given as
|
---|
905 | a string. Default is the default value, which will either be "#REQUIRED",
|
---|
906 | "#IMPLIED" or a quoted string (i.e. the returned string will begin and end
|
---|
907 | with a quote character). If Fixed is true, then this is a fixed attribute.
|
---|
908 |
|
---|
909 | =item * Doctype (Parser, Name, Sysid, Pubid, Internal)
|
---|
910 |
|
---|
911 | This handler is called for DOCTYPE declarations. Name is the document type
|
---|
912 | name. Sysid is the system id of the document type, if it was provided,
|
---|
913 | otherwise it's undefined. Pubid is the public id of the document type,
|
---|
914 | which will be undefined if no public id was given. Internal will be
|
---|
915 | true or false, indicating whether or not the doctype declaration contains
|
---|
916 | an internal subset.
|
---|
917 |
|
---|
918 | =item * DoctypeFin (Parser)
|
---|
919 |
|
---|
920 | This handler is called after parsing of the DOCTYPE declaration has finished,
|
---|
921 | including any internal or external DTD declarations.
|
---|
922 |
|
---|
923 | =item * XMLDecl (Parser, Version, Encoding, Standalone)
|
---|
924 |
|
---|
925 | This handler is called for XML declarations. Version is a string containg
|
---|
926 | the version. Encoding is either undefined or contains an encoding string.
|
---|
927 | Standalone is either undefined, or true or false. Undefined indicates
|
---|
928 | that no standalone parameter was given in the XML declaration. True or
|
---|
929 | false indicates "yes" or "no" respectively.
|
---|
930 |
|
---|
931 | =back
|
---|
932 |
|
---|
933 | =item namespace(name)
|
---|
934 |
|
---|
935 | Return the URI of the namespace that the name belongs to. If the name doesn't
|
---|
936 | belong to any namespace, an undef is returned. This is only valid on names
|
---|
937 | received through the Start or End handlers from a single document, or through
|
---|
938 | a call to the generate_ns_name method. In other words, don't use names
|
---|
939 | generated from one instance of XML::Parser::Expat with other instances.
|
---|
940 |
|
---|
941 | =item eq_name(name1, name2)
|
---|
942 |
|
---|
943 | Return true if name1 and name2 are identical (i.e. same name and from
|
---|
944 | the same namespace.) This is only meaningful if both names were obtained
|
---|
945 | through the Start or End handlers from a single document, or through
|
---|
946 | a call to the generate_ns_name method.
|
---|
947 |
|
---|
948 | =item generate_ns_name(name, namespace)
|
---|
949 |
|
---|
950 | Return a name, associated with a given namespace, good for using with the
|
---|
951 | above 2 methods. The namespace argument should be the namespace URI, not
|
---|
952 | a prefix.
|
---|
953 |
|
---|
954 | =item new_ns_prefixes
|
---|
955 |
|
---|
956 | When called from a start tag handler, returns namespace prefixes declared
|
---|
957 | with this start tag. If called elsewere (or if there were no namespace
|
---|
958 | prefixes declared), it returns an empty list. Setting of the default
|
---|
959 | namespace is indicated with '#default' as a prefix.
|
---|
960 |
|
---|
961 | =item expand_ns_prefix(prefix)
|
---|
962 |
|
---|
963 | Return the uri to which the given prefix is currently bound. Returns
|
---|
964 | undef if the prefix isn't currently bound. Use '#default' to find the
|
---|
965 | current binding of the default namespace (if any).
|
---|
966 |
|
---|
967 | =item current_ns_prefixes
|
---|
968 |
|
---|
969 | Return a list of currently bound namespace prefixes. The order of the
|
---|
970 | the prefixes in the list has no meaning. If the default namespace is
|
---|
971 | currently bound, '#default' appears in the list.
|
---|
972 |
|
---|
973 | =item recognized_string
|
---|
974 |
|
---|
975 | Returns the string from the document that was recognized in order to call
|
---|
976 | the current handler. For instance, when called from a start handler, it
|
---|
977 | will give us the the start-tag string. The string is encoded in UTF-8.
|
---|
978 | This method doesn't return a meaningful string inside declaration handlers.
|
---|
979 |
|
---|
980 | =item original_string
|
---|
981 |
|
---|
982 | Returns the verbatim string from the document that was recognized in
|
---|
983 | order to call the current handler. The string is in the original document
|
---|
984 | encoding. This method doesn't return a meaningful string inside declaration
|
---|
985 | handlers.
|
---|
986 |
|
---|
987 | =item default_current
|
---|
988 |
|
---|
989 | When called from a handler, causes the sequence of characters that generated
|
---|
990 | the corresponding event to be sent to the default handler (if one is
|
---|
991 | registered). Use of this method is deprecated in favor the recognized_string
|
---|
992 | method, which you can use without installing a default handler. This
|
---|
993 | method doesn't deliver a meaningful string to the default handler when
|
---|
994 | called from inside declaration handlers.
|
---|
995 |
|
---|
996 | =item xpcroak(message)
|
---|
997 |
|
---|
998 | Concatenate onto the given message the current line number within the
|
---|
999 | XML document plus the message implied by ErrorContext. Then croak with
|
---|
1000 | the formed message.
|
---|
1001 |
|
---|
1002 | =item xpcarp(message)
|
---|
1003 |
|
---|
1004 | Concatenate onto the given message the current line number within the
|
---|
1005 | XML document plus the message implied by ErrorContext. Then carp with
|
---|
1006 | the formed message.
|
---|
1007 |
|
---|
1008 | =item current_line
|
---|
1009 |
|
---|
1010 | Returns the line number of the current position of the parse.
|
---|
1011 |
|
---|
1012 | =item current_column
|
---|
1013 |
|
---|
1014 | Returns the column number of the current position of the parse.
|
---|
1015 |
|
---|
1016 | =item current_byte
|
---|
1017 |
|
---|
1018 | Returns the current position of the parse.
|
---|
1019 |
|
---|
1020 | =item base([NEWBASE]);
|
---|
1021 |
|
---|
1022 | Returns the current value of the base for resolving relative URIs. If
|
---|
1023 | NEWBASE is supplied, changes the base to that value.
|
---|
1024 |
|
---|
1025 | =item context
|
---|
1026 |
|
---|
1027 | Returns a list of element names that represent open elements, with the
|
---|
1028 | last one being the innermost. Inside start and end tag handlers, this
|
---|
1029 | will be the tag of the parent element.
|
---|
1030 |
|
---|
1031 | =item current_element
|
---|
1032 |
|
---|
1033 | Returns the name of the innermost currently opened element. Inside
|
---|
1034 | start or end handlers, returns the parent of the element associated
|
---|
1035 | with those tags.
|
---|
1036 |
|
---|
1037 | =item in_element(NAME)
|
---|
1038 |
|
---|
1039 | Returns true if NAME is equal to the name of the innermost currently opened
|
---|
1040 | element. If namespace processing is being used and you want to check
|
---|
1041 | against a name that may be in a namespace, then use the generate_ns_name
|
---|
1042 | method to create the NAME argument.
|
---|
1043 |
|
---|
1044 | =item within_element(NAME)
|
---|
1045 |
|
---|
1046 | Returns the number of times the given name appears in the context list.
|
---|
1047 | If namespace processing is being used and you want to check
|
---|
1048 | against a name that may be in a namespace, then use the generate_ns_name
|
---|
1049 | method to create the NAME argument.
|
---|
1050 |
|
---|
1051 | =item depth
|
---|
1052 |
|
---|
1053 | Returns the size of the context list.
|
---|
1054 |
|
---|
1055 | =item element_index
|
---|
1056 |
|
---|
1057 | Returns an integer that is the depth-first visit order of the current
|
---|
1058 | element. This will be zero outside of the root element. For example,
|
---|
1059 | this will return 1 when called from the start handler for the root element
|
---|
1060 | start tag.
|
---|
1061 |
|
---|
1062 | =item skip_until(INDEX)
|
---|
1063 |
|
---|
1064 | INDEX is an integer that represents an element index. When this method
|
---|
1065 | is called, all handlers are suspended until the start tag for an element
|
---|
1066 | that has an index number equal to INDEX is seen. If a start handler has
|
---|
1067 | been set, then this is the first tag that the start handler will see
|
---|
1068 | after skip_until has been called.
|
---|
1069 |
|
---|
1070 |
|
---|
1071 | =item position_in_context(LINES)
|
---|
1072 |
|
---|
1073 | Returns a string that shows the current parse position. LINES should be
|
---|
1074 | an integer >= 0 that represents the number of lines on either side of the
|
---|
1075 | current parse line to place into the returned string.
|
---|
1076 |
|
---|
1077 | =item xml_escape(TEXT [, CHAR [, CHAR ...]])
|
---|
1078 |
|
---|
1079 | Returns TEXT with markup characters turned into character entities. Any
|
---|
1080 | additional characters provided as arguments are also turned into character
|
---|
1081 | references where found in TEXT.
|
---|
1082 |
|
---|
1083 | =item parse (SOURCE)
|
---|
1084 |
|
---|
1085 | The SOURCE parameter should either be a string containing the whole XML
|
---|
1086 | document, or it should be an open IO::Handle. Only a single document
|
---|
1087 | may be parsed for a given instance of XML::Parser::Expat, so this will croak
|
---|
1088 | if it's been called previously for this instance.
|
---|
1089 |
|
---|
1090 | =item parsestring(XML_DOC_STRING)
|
---|
1091 |
|
---|
1092 | Parses the given string as an XML document. Only a single document may be
|
---|
1093 | parsed for a given instance of XML::Parser::Expat, so this will die if either
|
---|
1094 | parsestring or parsefile has been called for this instance previously.
|
---|
1095 |
|
---|
1096 | This method is deprecated in favor of the parse method.
|
---|
1097 |
|
---|
1098 | =item parsefile(FILENAME)
|
---|
1099 |
|
---|
1100 | Parses the XML document in the given file. Will die if parsestring or
|
---|
1101 | parsefile has been called previously for this instance.
|
---|
1102 |
|
---|
1103 | =item is_defaulted(ATTNAME)
|
---|
1104 |
|
---|
1105 | NO LONGER WORKS. To find out if an attribute is defaulted please use
|
---|
1106 | the specified_attr method.
|
---|
1107 |
|
---|
1108 | =item specified_attr
|
---|
1109 |
|
---|
1110 | When the start handler receives lists of attributes and values, the
|
---|
1111 | non-defaulted (i.e. explicitly specified) attributes occur in the list
|
---|
1112 | first. This method returns the number of specified items in the list.
|
---|
1113 | So if this number is equal to the length of the list, there were no
|
---|
1114 | defaulted values. Otherwise the number points to the index of the
|
---|
1115 | first defaulted attribute name.
|
---|
1116 |
|
---|
1117 | =item finish
|
---|
1118 |
|
---|
1119 | Unsets all handlers (including internal ones that set context), but expat
|
---|
1120 | continues parsing to the end of the document or until it finds an error.
|
---|
1121 | It should finish up a lot faster than with the handlers set.
|
---|
1122 |
|
---|
1123 | =item release
|
---|
1124 |
|
---|
1125 | There are data structures used by XML::Parser::Expat that have circular
|
---|
1126 | references. This means that these structures will never be garbage
|
---|
1127 | collected unless these references are explicitly broken. Calling this
|
---|
1128 | method breaks those references (and makes the instance unusable.)
|
---|
1129 |
|
---|
1130 | Normally, higher level calls handle this for you, but if you are using
|
---|
1131 | XML::Parser::Expat directly, then it's your responsibility to call it.
|
---|
1132 |
|
---|
1133 | =back
|
---|
1134 |
|
---|
1135 | =head2 XML::Parser::ContentModel Methods
|
---|
1136 |
|
---|
1137 | The element declaration handlers are passed objects of this class as the
|
---|
1138 | content model of the element declaration. They also represent content
|
---|
1139 | particles, components of a content model.
|
---|
1140 |
|
---|
1141 | When referred to as a string, these objects are automagicly converted to a
|
---|
1142 | string representation of the model (or content particle).
|
---|
1143 |
|
---|
1144 | =over 4
|
---|
1145 |
|
---|
1146 | =item isempty
|
---|
1147 |
|
---|
1148 | This method returns true if the object is "EMPTY", false otherwise.
|
---|
1149 |
|
---|
1150 | =item isany
|
---|
1151 |
|
---|
1152 | This method returns true if the object is "ANY", false otherwise.
|
---|
1153 |
|
---|
1154 | =item ismixed
|
---|
1155 |
|
---|
1156 | This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*",
|
---|
1157 | false otherwise.
|
---|
1158 |
|
---|
1159 | =item isname
|
---|
1160 |
|
---|
1161 | This method returns if the object is an element name.
|
---|
1162 |
|
---|
1163 | =item ischoice
|
---|
1164 |
|
---|
1165 | This method returns true if the object is a choice of content particles.
|
---|
1166 |
|
---|
1167 |
|
---|
1168 | =item isseq
|
---|
1169 |
|
---|
1170 | This method returns true if the object is a sequence of content particles.
|
---|
1171 |
|
---|
1172 | =item quant
|
---|
1173 |
|
---|
1174 | This method returns undef or a string representing the quantifier
|
---|
1175 | ('?', '*', '+') associated with the model or particle.
|
---|
1176 |
|
---|
1177 | =item children
|
---|
1178 |
|
---|
1179 | This method returns undef or (for mixed, choice, and sequence types)
|
---|
1180 | an array of component content particles. There will always be at least
|
---|
1181 | one component for choices and sequences, but for a mixed content model
|
---|
1182 | of pure PCDATA, "(#PCDATA)", then an undef is returned.
|
---|
1183 |
|
---|
1184 | =back
|
---|
1185 |
|
---|
1186 | =head2 XML::Parser::ExpatNB Methods
|
---|
1187 |
|
---|
1188 | The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used
|
---|
1189 | for non-blocking access to the expat library. It does not support the parse,
|
---|
1190 | parsestring, or parsefile methods, but it does have these additional methods:
|
---|
1191 |
|
---|
1192 | =over 4
|
---|
1193 |
|
---|
1194 | =item parse_more(DATA)
|
---|
1195 |
|
---|
1196 | Feed expat more text to munch on.
|
---|
1197 |
|
---|
1198 | =item parse_done
|
---|
1199 |
|
---|
1200 | Tell expat that it's gotten the whole document.
|
---|
1201 |
|
---|
1202 | =back
|
---|
1203 |
|
---|
1204 | =head1 FUNCTIONS
|
---|
1205 |
|
---|
1206 | =over 4
|
---|
1207 |
|
---|
1208 | =item XML::Parser::Expat::load_encoding(ENCODING)
|
---|
1209 |
|
---|
1210 | Load an external encoding. ENCODING is either the name of an encoding or
|
---|
1211 | the name of a file. The basename is converted to lowercase and a '.enc'
|
---|
1212 | extension is appended unless there's one already there. Then, unless
|
---|
1213 | it's an absolute pathname (i.e. begins with '/'), the first file by that
|
---|
1214 | name discovered in the @Encoding_Path path list is used.
|
---|
1215 |
|
---|
1216 | The encoding in the file is loaded and kept in the %Encoding_Table
|
---|
1217 | table. Earlier encodings of the same name are replaced.
|
---|
1218 |
|
---|
1219 | This function is automatically called by expat when it encounters an encoding
|
---|
1220 | it doesn't know about. Expat shouldn't call this twice for the same
|
---|
1221 | encoding name. The only reason users should use this function is to
|
---|
1222 | explicitly load an encoding not contained in the @Encoding_Path list.
|
---|
1223 |
|
---|
1224 | =back
|
---|
1225 |
|
---|
1226 | =head1 AUTHORS
|
---|
1227 |
|
---|
1228 | Larry Wall <F<[email protected]>> wrote version 1.0.
|
---|
1229 |
|
---|
1230 | Clark Cooper <F<[email protected]>> picked up support, changed the API
|
---|
1231 | for this version (2.x), provided documentation, and added some standard
|
---|
1232 | package features.
|
---|
1233 |
|
---|
1234 | =cut
|
---|