source: main/trunk/release-kits/shared/linux/XML-Parser/64-bit/perl-5.24/XML/Parser.pm@ 31805

Last change on this file since 31805 was 31805, checked in by ak19, 7 years ago

Committing the perl 5.24 that Dr Bainbridge successfully built on Ubuntu 16.06 against (a locally compiled, non-system) perl 5.24 perl. It is a byproduct of compiling up a gs3-svn on the Ubuntu 16.04 with the perl 5.24 in the PATH. This perl-5.24 folder was copied from the gs3-svn's gs2build/perllib/cpan folder. It's being committed into the release-kits area as other versions before have been, as it's needed for release-kits, to generate binaries that will work with perl 5.24. However, the version we generated only has Expat.so, not le Expat.bs. Not sure if that's still necessary, and if so, how it should be generated. See http://www.cryst.bbk.ac.uk/CCSG/programming/perl/PerlDoc/lib/ExtUtils/Mkbootstrap.html for ideas, if the bs extension stands for bootstrap as it seems to.

File size: 26.5 KB
Line 
1# XML::Parser
2#
3# Copyright (c) 1998-2000 Larry Wall and Clark Cooper
4# All rights reserved.
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the same terms as Perl itself.
8
9package XML::Parser;
10
11use strict;
12
13use vars qw($VERSION $LWP_load_failed);
14
15use Carp;
16
17BEGIN {
18 require XML::Parser::Expat;
19 $VERSION = '2.41';
20 die "Parser.pm and Expat.pm versions don't match"
21 unless $VERSION eq $XML::Parser::Expat::VERSION;
22}
23
24$LWP_load_failed = 0;
25
26sub new {
27 my ($class, %args) = @_;
28 my $style = $args{Style};
29
30 my $nonexopt = $args{Non_Expat_Options} ||= {};
31
32 $nonexopt->{Style} = 1;
33 $nonexopt->{Non_Expat_Options} = 1;
34 $nonexopt->{Handlers} = 1;
35 $nonexopt->{_HNDL_TYPES} = 1;
36 $nonexopt->{NoLWP} = 1;
37
38 $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters};
39 $args{_HNDL_TYPES}->{Init} = 1;
40 $args{_HNDL_TYPES}->{Final} = 1;
41
42 $args{Handlers} ||= {};
43 my $handlers = $args{Handlers};
44
45 if (defined($style)) {
46 my $stylepkg = $style;
47
48 if ($stylepkg !~ /::/) {
49 $stylepkg = "\u$style";
50
51 eval {
52 my $fullpkg = 'XML::Parser::Style::' . $stylepkg;
53 my $stylefile = $fullpkg;
54 $stylefile =~ s/::/\//g;
55 require "$stylefile.pm";
56 $stylepkg = $fullpkg;
57 };
58 if ($@) {
59 # fallback to old behaviour
60 $stylepkg = 'XML::Parser::' . $stylepkg;
61 }
62 }
63
64 my $htype;
65 foreach $htype (keys %{$args{_HNDL_TYPES}}) {
66 # Handlers explicity given override
67 # handlers from the Style package
68 unless (defined($handlers->{$htype})) {
69
70 # A handler in the style package must either have
71 # exactly the right case as the type name or a
72 # completely lower case version of it.
73
74 my $hname = "${stylepkg}::$htype";
75 if (defined(&$hname)) {
76 $handlers->{$htype} = \&$hname;
77 next;
78 }
79
80 $hname = "${stylepkg}::\L$htype";
81 if (defined(&$hname)) {
82 $handlers->{$htype} = \&$hname;
83 next;
84 }
85 }
86 }
87 }
88
89 unless (defined($handlers->{ExternEnt})
90 or defined ($handlers->{ExternEntFin})) {
91
92 if ($args{NoLWP} or $LWP_load_failed) {
93 $handlers->{ExternEnt} = \&file_ext_ent_handler;
94 $handlers->{ExternEntFin} = \&file_ext_ent_cleanup;
95 }
96 else {
97 # The following just bootstraps the real LWP external entity
98 # handler
99
100 $handlers->{ExternEnt} = \&initial_ext_ent_handler;
101
102 # No cleanup function available until LWPExternEnt.pl loaded
103 }
104 }
105
106 $args{Pkg} ||= caller;
107 bless \%args, $class;
108} # End of new
109
110sub setHandlers {
111 my ($self, @handler_pairs) = @_;
112
113 croak("Uneven number of arguments to setHandlers method")
114 if (int(@handler_pairs) & 1);
115
116 my @ret;
117 while (@handler_pairs) {
118 my $type = shift @handler_pairs;
119 my $handler = shift @handler_pairs;
120 unless (defined($self->{_HNDL_TYPES}->{$type})) {
121 my @types = sort keys %{$self->{_HNDL_TYPES}};
122
123 croak("Unknown Parser handler type: $type\n Valid types: @types");
124 }
125 push(@ret, $type, $self->{Handlers}->{$type});
126 $self->{Handlers}->{$type} = $handler;
127 }
128
129 return @ret;
130}
131
132sub parse_start {
133 my $self = shift;
134 my @expat_options = ();
135
136 my ($key, $val);
137 while (($key, $val) = each %{$self}) {
138 push (@expat_options, $key, $val)
139 unless exists $self->{Non_Expat_Options}->{$key};
140 }
141
142 my %handlers = %{$self->{Handlers}};
143 my $init = delete $handlers{Init};
144 my $final = delete $handlers{Final};
145
146 my $expatnb = XML::Parser::ExpatNB->new(@expat_options, @_);
147 $expatnb->setHandlers(%handlers);
148
149 &$init($expatnb)
150 if defined($init);
151
152 $expatnb->{_State_} = 1;
153
154 $expatnb->{FinalHandler} = $final
155 if defined($final);
156
157 return $expatnb;
158}
159
160sub parse {
161 my $self = shift;
162 my $arg = shift;
163 my @expat_options = ();
164 my ($key, $val);
165 while (($key, $val) = each %{$self}) {
166 push(@expat_options, $key, $val)
167 unless exists $self->{Non_Expat_Options}->{$key};
168 }
169
170 my $expat = XML::Parser::Expat->new(@expat_options, @_);
171 my %handlers = %{$self->{Handlers}};
172 my $init = delete $handlers{Init};
173 my $final = delete $handlers{Final};
174
175 $expat->setHandlers(%handlers);
176
177 if ($self->{Base}) {
178 $expat->base($self->{Base});
179 }
180
181 &$init($expat)
182 if defined($init);
183
184 my @result = ();
185 my $result;
186 eval {
187 $result = $expat->parse($arg);
188 };
189 my $err = $@;
190 if ($err) {
191 $expat->release;
192 die $err;
193 }
194
195 if ($result and defined($final)) {
196 if (wantarray) {
197 @result = &$final($expat);
198 }
199 else {
200 $result = &$final($expat);
201 }
202 }
203
204 $expat->release;
205
206 return unless defined wantarray;
207 return wantarray ? @result : $result;
208}
209
210sub parsestring {
211 my $self = shift;
212 $self->parse(@_);
213}
214
215sub parsefile {
216 my $self = shift;
217 my $file = shift;
218 local(*FILE);
219 open(FILE, $file) or croak "Couldn't open $file:\n$!";
220 binmode(FILE);
221 my @ret;
222 my $ret;
223
224 $self->{Base} = $file;
225
226 if (wantarray) {
227 eval {
228 @ret = $self->parse(*FILE, @_);
229 };
230 }
231 else {
232 eval {
233 $ret = $self->parse(*FILE, @_);
234 };
235 }
236 my $err = $@;
237 close(FILE);
238 die $err if $err;
239
240 return unless defined wantarray;
241 return wantarray ? @ret : $ret;
242}
243
244sub initial_ext_ent_handler {
245 # This just bootstraps in the real lwp_ext_ent_handler which
246 # also loads the URI and LWP modules.
247
248 unless ($LWP_load_failed) {
249 local($^W) = 0;
250
251 my $stat =
252 eval {
253 require('XML/Parser/LWPExternEnt.pl');
254 };
255
256 if ($stat) {
257 $_[0]->setHandlers(ExternEnt => \&lwp_ext_ent_handler,
258 ExternEntFin => \&lwp_ext_ent_cleanup);
259
260 goto &lwp_ext_ent_handler;
261 }
262
263 # Failed to load lwp handler, act as if NoLWP
264
265 $LWP_load_failed = 1;
266
267 my $cmsg = "Couldn't load LWP based external entity handler\n";
268 $cmsg .= "Switching to file-based external entity handler\n";
269 $cmsg .= " (To avoid this message, use NoLWP option to XML::Parser)\n";
270 warn($cmsg);
271 }
272
273 $_[0]->setHandlers(ExternEnt => \&file_ext_ent_handler,
274 ExternEntFin => \&file_ext_ent_cleanup);
275 goto &file_ext_ent_handler;
276
277}
278
279sub file_ext_ent_handler {
280 my ($xp, $base, $path) = @_;
281
282 # Prepend base only for relative paths
283
284 if (defined($base)
285 and not ($path =~ m!^(?:[\\/]|\w+:)!))
286 {
287 my $newpath = $base;
288 $newpath =~ s![^\\/:]*$!$path!;
289 $path = $newpath;
290 }
291
292 if ($path =~ /^\s*[|>+]/
293 or $path =~ /\|\s*$/) {
294 $xp->{ErrorMessage}
295 .= "System ID ($path) contains Perl IO control characters";
296 return undef;
297 }
298
299 require IO::File;
300 my $fh = IO::File->new($path);
301 unless (defined $fh) {
302 $xp->{ErrorMessage}
303 .= "Failed to open $path:\n$!";
304 return undef;
305 }
306
307 $xp->{_BaseStack} ||= [];
308 $xp->{_FhStack} ||= [];
309
310 push(@{$xp->{_BaseStack}}, $base);
311 push(@{$xp->{_FhStack}}, $fh);
312
313 $xp->base($path);
314
315 return $fh;
316}
317
318sub file_ext_ent_cleanup {
319 my ($xp) = @_;
320
321 my $fh = pop(@{$xp->{_FhStack}});
322 $fh->close;
323
324 my $base = pop(@{$xp->{_BaseStack}});
325 $xp->base($base);
326}
327
3281;
329
330__END__
331
332=head1 NAME
333
334XML::Parser - A perl module for parsing XML documents
335
336=head1 SYNOPSIS
337
338 use XML::Parser;
339
340 $p1 = XML::Parser->new(Style => 'Debug');
341 $p1->parsefile('REC-xml-19980210.xml');
342 $p1->parse('<foo id="me">Hello World</foo>');
343
344 # Alternative
345 $p2 = XML::Parser->new(Handlers => {Start => \&handle_start,
346 End => \&handle_end,
347 Char => \&handle_char});
348 $p2->parse($socket);
349
350 # Another alternative
351 $p3 = XML::Parser->new(ErrorContext => 2);
352
353 $p3->setHandlers(Char => \&text,
354 Default => \&other);
355
356 open(FOO, 'xmlgenerator |');
357 $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1');
358 close(FOO);
359
360 $p3->parsefile('junk.xml', ErrorContext => 3);
361
362=begin man
363.ds PI PI
364
365=end man
366
367=head1 DESCRIPTION
368
369This module provides ways to parse XML documents. It is built on top of
370L<XML::Parser::Expat>, which is a lower level interface to James Clark's
371expat library. Each call to one of the parsing methods creates a new
372instance of XML::Parser::Expat which is then used to parse the document.
373Expat options may be provided when the XML::Parser object is created.
374These options are then passed on to the Expat object on each parse call.
375They can also be given as extra arguments to the parse methods, in which
376case they override options given at XML::Parser creation time.
377
378The behavior of the parser is controlled either by C<L</STYLES>> and/or
379C<L</HANDLERS>> options, or by L</setHandlers> method. These all provide
380mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat.
381If neither C<Style> nor C<Handlers> are specified, then parsing just
382checks the document for being well-formed.
383
384When underlying handlers get called, they receive as their first parameter
385the I<Expat> object, not the Parser object.
386
387=head1 METHODS
388
389=over 4
390
391=item new
392
393This is a class method, the constructor for XML::Parser. Options are passed
394as keyword value pairs. Recognized options are:
395
396=over 4
397
398=item * Style
399
400This option provides an easy way to create a given style of parser. The
401built in styles are: L<"Debug">, L<"Subs">, L<"Tree">, L<"Objects">,
402and L<"Stream">. These are all defined in separate packages under
403C<XML::Parser::Style::*>, and you can find further documentation for
404each style both below, and in those packages.
405
406Custom styles can be provided by giving a full package name containing
407at least one '::'. This package should then have subs defined for each
408handler it wishes to have installed. See L<"STYLES"> below
409for a discussion of each built in style.
410
411=item * Handlers
412
413When provided, this option should be an anonymous hash containing as
414keys the type of handler and as values a sub reference to handle that
415type of event. All the handlers get passed as their 1st parameter the
416instance of expat that is parsing the document. Further details on
417handlers can be found in L<"HANDLERS">. Any handler set here
418overrides the corresponding handler set with the Style option.
419
420=item * Pkg
421
422Some styles will refer to subs defined in this package. If not provided,
423it defaults to the package which called the constructor.
424
425=item * ErrorContext
426
427This is an Expat option. When this option is defined, errors are reported
428in context. The value should be the number of lines to show on either side
429of the line in which the error occurred.
430
431=item * ProtocolEncoding
432
433This is an Expat option. This sets the protocol encoding name. It defaults
434to none. The built-in encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and
435C<US-ASCII>. Other encodings may be used if they have encoding maps in one
436of the directories in the @Encoding_Path list. Check L<"ENCODINGS"> for
437more information on encoding maps. Setting the protocol encoding overrides
438any encoding in the XML declaration.
439
440=item * Namespaces
441
442This is an Expat option. If this is set to a true value, then namespace
443processing is done during the parse. See L<XML::Parser::Expat/"Namespaces">
444for further discussion of namespace processing.
445
446=item * NoExpand
447
448This is an Expat option. Normally, the parser will try to expand references
449to entities defined in the internal subset. If this option is set to a true
450value, and a default handler is also set, then the default handler will be
451called when an entity reference is seen in text. This has no effect if a
452default handler has not been registered, and it has no effect on the expansion
453of entity references inside attribute values.
454
455=item * Stream_Delimiter
456
457This is an Expat option. It takes a string value. When this string is found
458alone on a line while parsing from a stream, then the parse is ended as if it
459saw an end of file. The intended use is with a stream of xml documents in a
460MIME multipart format. The string should not contain a trailing newline.
461
462=item * ParseParamEnt
463
464This is an Expat option. Unless standalone is set to "yes" in the XML
465declaration, setting this to a true value allows the external DTD to be read,
466and parameter entities to be parsed and expanded.
467
468=item * NoLWP
469
470This option has no effect if the ExternEnt or ExternEntFin handlers are
471directly set. Otherwise, if true, it forces the use of a file based external
472entity handler.
473
474=item * Non-Expat-Options
475
476If provided, this should be an anonymous hash whose keys are options that
477shouldn't be passed to Expat. This should only be of concern to those
478subclassing XML::Parser.
479
480=back
481
482=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
483
484This method registers handlers for various parser events. It overrides any
485previous handlers registered through the Style or Handler options or through
486earlier calls to setHandlers. By providing a false or undefined value as
487the handler, the existing handler can be unset.
488
489This method returns a list of type, handler pairs corresponding to the
490input. The handlers returned are the ones that were in effect prior to
491the call.
492
493See a description of the handler types in L<"HANDLERS">.
494
495=item parse(SOURCE [, OPT => OPT_VALUE [...]])
496
497The SOURCE parameter should either be a string containing the whole XML
498document, or it should be an open IO::Handle. Constructor options to
499XML::Parser::Expat given as keyword-value pairs may follow the SOURCE
500parameter. These override, for this call, any options or attributes passed
501through from the XML::Parser instance.
502
503A die call is thrown if a parse error occurs. Otherwise it will return 1
504or whatever is returned from the B<Final> handler, if one is installed.
505In other words, what parse may return depends on the style.
506
507=item parsestring
508
509This is just an alias for parse for backwards compatibility.
510
511=item parsefile(FILE [, OPT => OPT_VALUE [...]])
512
513Open FILE for reading, then call parse with the open handle. The file
514is closed no matter how parse returns. Returns what parse returns.
515
516=item parse_start([ OPT => OPT_VALUE [...]])
517
518Create and return a new instance of XML::Parser::ExpatNB. Constructor
519options may be provided. If an init handler has been provided, it is
520called before returning the ExpatNB object. Documents are parsed by
521making incremental calls to the parse_more method of this object, which
522takes a string. A single call to the parse_done method of this object,
523which takes no arguments, indicates that the document is finished.
524
525If there is a final handler installed, it is executed by the parse_done
526method before returning and the parse_done method returns whatever is
527returned by the final handler.
528
529=back
530
531=head1 HANDLERS
532
533Expat is an event based parser. As the parser recognizes parts of the
534document (say the start or end tag for an XML element), then any handlers
535registered for that type of an event are called with suitable parameters.
536All handlers receive an instance of XML::Parser::Expat as their first
537argument. See L<XML::Parser::Expat/"METHODS"> for a discussion of the
538methods that can be called on this object.
539
540=head2 Init (Expat)
541
542This is called just before the parsing of the document starts.
543
544=head2 Final (Expat)
545
546This is called just after parsing has finished, but only if no errors
547occurred during the parse. Parse returns what this returns.
548
549=head2 Start (Expat, Element [, Attr, Val [,...]])
550
551This event is generated when an XML start tag is recognized. Element is the
552name of the XML element type that is opened with the start tag. The Attr &
553Val pairs are generated for each attribute in the start tag.
554
555=head2 End (Expat, Element)
556
557This event is generated when an XML end tag is recognized. Note that
558an XML empty tag (<foo/>) generates both a start and an end event.
559
560=head2 Char (Expat, String)
561
562This event is generated when non-markup is recognized. The non-markup
563sequence of characters is in String. A single non-markup sequence of
564characters may generate multiple calls to this handler. Whatever the
565encoding of the string in the original document, this is given to the
566handler in UTF-8.
567
568=head2 Proc (Expat, Target, Data)
569
570This event is generated when a processing instruction is recognized.
571
572=head2 Comment (Expat, Data)
573
574This event is generated when a comment is recognized.
575
576=head2 CdataStart (Expat)
577
578This is called at the start of a CDATA section.
579
580=head2 CdataEnd (Expat)
581
582This is called at the end of a CDATA section.
583
584=head2 Default (Expat, String)
585
586This is called for any characters that don't have a registered handler.
587This includes both characters that are part of markup for which no
588events are generated (markup declarations) and characters that
589could generate events, but for which no handler has been registered.
590
591Whatever the encoding in the original document, the string is returned to
592the handler in UTF-8.
593
594=head2 Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation)
595
596This is called for a declaration of an unparsed entity. Entity is the name
597of the entity. Base is the base to be used for resolving a relative URI.
598Sysid is the system id. Pubid is the public id. Notation is the notation
599name. Base and Pubid may be undefined.
600
601=head2 Notation (Expat, Notation, Base, Sysid, Pubid)
602
603This is called for a declaration of notation. Notation is the notation name.
604Base is the base to be used for resolving a relative URI. Sysid is the system
605id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
606
607=head2 ExternEnt (Expat, Base, Sysid, Pubid)
608
609This is called when an external entity is referenced. Base is the base to be
610used for resolving a relative URI. Sysid is the system id. Pubid is the public
611id. Base, and Pubid may be undefined.
612
613This handler should either return a string, which represents the contents of
614the external entity, or return an open filehandle that can be read to obtain
615the contents of the external entity, or return undef, which indicates the
616external entity couldn't be found and will generate a parse error.
617
618If an open filehandle is returned, it must be returned as either a glob
619(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
620
621A default handler is installed for this event. The default handler is
622XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with
623a true value, otherwise XML::Parser::file_ext_ent_handler is the default
624handler for external entities. Even without the NoLWP option, if the
625URI or LWP modules are missing, the file based handler ends up being used
626after giving a warning on the first external entity reference.
627
628The LWP external entity handler will use proxies defined in the environment
629(http_proxy, ftp_proxy, etc.).
630
631Please note that the LWP external entity handler reads the entire
632entity into a string and returns it, where as the file handler opens a
633filehandle.
634
635Also note that the file external entity handler will likely choke on
636absolute URIs or file names that don't fit the conventions of the local
637operating system.
638
639The expat base method can be used to set a basename for
640relative pathnames. If no basename is given, or if the basename is itself
641a relative name, then it is relative to the current working directory.
642
643=head2 ExternEntFin (Expat)
644
645This is called after parsing an external entity. It's not called unless
646an ExternEnt handler is also set. There is a default handler installed
647that pairs with the default ExternEnt handler.
648
649If you're going to install your own ExternEnt handler, then you should
650set (or unset) this handler too.
651
652=head2 Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)
653
654This is called when an entity is declared. For internal entities, the Val
655parameter will contain the value and the remaining three parameters will be
656undefined. For external entities, the Val parameter will be undefined, the
657Sysid parameter will have the system id, the Pubid parameter will have the
658public id if it was provided (it will be undefined otherwise), the Ndata
659parameter will contain the notation for unparsed entities. If this is a
660parameter entity declaration, then the IsParam parameter is true.
661
662Note that this handler and the Unparsed handler above overlap. If both are
663set, then this handler will not be called for unparsed entities.
664
665=head2 Element (Expat, Name, Model)
666
667The element handler is called when an element declaration is found. Name
668is the element name, and Model is the content model as an XML::Parser::Content
669object. See L<XML::Parser::Expat/"XML::Parser::ContentModel Methods">
670for methods available for this class.
671
672=head2 Attlist (Expat, Elname, Attname, Type, Default, Fixed)
673
674This handler is called for each attribute in an ATTLIST declaration.
675So an ATTLIST declaration that has multiple attributes will generate multiple
676calls to this handler. The Elname parameter is the name of the element with
677which the attribute is being associated. The Attname parameter is the name
678of the attribute. Type is the attribute type, given as a string. Default is
679the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted
680string (i.e. the returned string will begin and end with a quote character).
681If Fixed is true, then this is a fixed attribute.
682
683=head2 Doctype (Expat, Name, Sysid, Pubid, Internal)
684
685This handler is called for DOCTYPE declarations. Name is the document type
686name. Sysid is the system id of the document type, if it was provided,
687otherwise it's undefined. Pubid is the public id of the document type,
688which will be undefined if no public id was given. Internal is the internal
689subset, given as a string. If there was no internal subset, it will be
690undefined. Internal will contain all whitespace, comments, processing
691instructions, and declarations seen in the internal subset. The declarations
692will be there whether or not they have been processed by another handler
693(except for unparsed entities processed by the Unparsed handler). However,
694comments and processing instructions will not appear if they've been processed
695by their respective handlers.
696
697=head2 * DoctypeFin (Parser)
698
699This handler is called after parsing of the DOCTYPE declaration has finished,
700including any internal or external DTD declarations.
701
702=head2 XMLDecl (Expat, Version, Encoding, Standalone)
703
704This handler is called for xml declarations. Version is a string containg
705the version. Encoding is either undefined or contains an encoding string.
706Standalone will be either true, false, or undefined if the standalone attribute
707is yes, no, or not made respectively.
708
709=head1 STYLES
710
711=head2 Debug
712
713This just prints out the document in outline form. Nothing special is
714returned by parse.
715
716=head2 Subs
717
718Each time an element starts, a sub by that name in the package specified
719by the Pkg option is called with the same parameters that the Start
720handler gets called with.
721
722Each time an element ends, a sub with that name appended with an underscore
723("_"), is called with the same parameters that the End handler gets called
724with.
725
726Nothing special is returned by parse.
727
728=head2 Tree
729
730Parse will return a parse tree for the document. Each node in the tree
731takes the form of a tag, content pair. Text nodes are represented with
732a pseudo-tag of "0" and the string that is their content. For elements,
733the content is an array reference. The first item in the array is a
734(possibly empty) hash reference containing attributes. The remainder of
735the array is a sequence of tag-content pairs representing the content
736of the element.
737
738So for example the result of parsing:
739
740 <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
741
742would be:
743
744 Tag Content
745 ==================================================================
746 [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]],
747 bar, [ {}, 0, "Howdy", ref, [{}]],
748 0, "do"
749 ]
750 ]
751
752The root document "foo", has 3 children: a "head" element, a "bar"
753element and the text "do". After the empty attribute hash, these are
754represented in it's contents by 3 tag-content pairs.
755
756=head2 Objects
757
758This is similar to the Tree style, except that a hash object is created for
759each element. The corresponding object will be in the class whose name
760is created by appending "::" and the element name to the package set with
761the Pkg option. Non-markup text will be in the ::Characters class. The
762contents of the corresponding object will be in an anonymous array that
763is the value of the Kids property for that object.
764
765=head2 Stream
766
767This style also uses the Pkg package. If none of the subs that this
768style looks for is there, then the effect of parsing with this style is
769to print a canonical copy of the document without comments or declarations.
770All the subs receive as their 1st parameter the Expat instance for the
771document they're parsing.
772
773It looks for the following routines:
774
775=over 4
776
777=item * StartDocument
778
779Called at the start of the parse .
780
781=item * StartTag
782
783Called for every start tag with a second parameter of the element type. The $_
784variable will contain a copy of the tag and the %_ variable will contain
785attribute values supplied for that element.
786
787=item * EndTag
788
789Called for every end tag with a second parameter of the element type. The $_
790variable will contain a copy of the end tag.
791
792=item * Text
793
794Called just before start or end tags with accumulated non-markup text in
795the $_ variable.
796
797=item * PI
798
799Called for processing instructions. The $_ variable will contain a copy of
800the PI and the target and data are sent as 2nd and 3rd parameters
801respectively.
802
803=item * EndDocument
804
805Called at conclusion of the parse.
806
807=back
808
809=head1 ENCODINGS
810
811XML documents may be encoded in character sets other than Unicode as
812long as they may be mapped into the Unicode character set. Expat has
813further restrictions on encodings. Read the xmlparse.h header file in
814the expat distribution to see details on these restrictions.
815
816Expat has built-in encodings for: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and
817C<US-ASCII>. Encodings are set either through the XML declaration
818encoding attribute or through the ProtocolEncoding option to XML::Parser
819or XML::Parser::Expat.
820
821For encodings other than the built-ins, expat calls the function
822load_encoding in the Expat package with the encoding name. This function
823looks for a file in the path list @XML::Parser::Expat::Encoding_Path, that
824matches the lower-cased name with a '.enc' extension. The first one it
825finds, it loads.
826
827If you wish to build your own encoding maps, check out the XML::Encoding
828module from CPAN.
829
830=head1 AUTHORS
831
832Larry Wall <F<[email protected]>> wrote version 1.0.
833
834Clark Cooper <F<[email protected]>> picked up support, changed the API
835for this version (2.x), provided documentation,
836and added some standard package features.
837
838Matt Sergeant <F<[email protected]>> is now maintaining XML::Parser
839
840=cut
Note: See TracBrowser for help on using the repository browser.