source: trunk/gsdl3/lib/perl/cpan/XML/Writer.pm@ 4183

Last change on this file since 4183 was 4183, checked in by kjdon, 21 years ago

XML writer from cpan. used by convert_coll_from_gs2.pl. have added in another function to output text as XML ie no escaping of < > &

  • Property svn:keywords set to Author Date Id Revision
File size: 30.5 KB
Line 
1########################################################################
2# Writer.pm - write an XML document.
3# Copyright (c) 1999 by Megginson Technologies.
4# No warranty. Commercial and non-commercial use freely permitted.
5#
6# $Id: Writer.pm 4183 2003-04-17 02:54:36Z kjdon $
7########################################################################
8
9package XML::Writer;
10
11require 5.004;
12
13use strict;
14use vars qw($VERSION);
15use Carp;
16use IO;
17
18$VERSION = "0.4";
19
20
21
22
23########################################################################
24# Constructor.
25########################################################################
26
27#
28# Public constructor.
29#
30# This actually does most of the work of the module: it defines closures
31# for all of the real processing, and selects the appropriate closures
32# to use based on the value of the UNSAFE parameter. The actual methods
33# are just stubs.
34#
35sub new {
36 my ($class, %params) = (@_);
37
38 # If the user wants namespaces,
39 # intercept the request here; it will
40 # come back to this constructor
41 # from within XML::Writer::Namespaces::new()
42 if ($params{NAMESPACES}) {
43 delete $params{NAMESPACES};
44 return new XML::Writer::Namespaces(%params);
45 }
46
47 # Set up $self and basic parameters
48 my $self;
49 my $output;
50 my $unsafe = $params{UNSAFE};
51 my $newlines = $params{NEWLINES};
52 my $dataMode = $params{DATA_MODE};
53 my $dataIndent = $params{DATA_INDENT};
54
55 # If the NEWLINES parameter is specified,
56 # set the $nl variable appropriately
57 my $nl = '';
58 if ($newlines) {
59 $nl = "\n";
60 }
61
62
63 # Parse variables
64 my @elementStack = ();
65 my $elementLevel = 0;
66 my %seen = ();
67
68 my $hasData = 0;
69 my @hasDataStack = ();
70 my $hasElement = 0;
71 my @hasElementStack = ();
72
73 #
74 # Private method to show attributes.
75 #
76 my $showAttributes = sub {
77 my $atts = $_[0];
78 my $i = 1;
79 while ($atts->[$i]) {
80 my $aname = $atts->[$i++];
81 my $value = _escapeLiteral($atts->[$i++]);
82 $output->print(" $aname=\"$value\"");
83 }
84 };
85
86 # Method implementations: the SAFE_
87 # versions perform error checking
88 # and then call the regular ones.
89 my $end = sub {
90 $output->print("\n");
91 };
92
93 my $SAFE_end = sub {
94 if (!$seen{ELEMENT}) {
95 croak("Document cannot end without a document element");
96 } elsif ($elementLevel > 0) {
97 croak("Document ended with unmatched start tag(s): @elementStack");
98 } else {
99 @elementStack = ();
100 $elementLevel = 0;
101 %seen = ();
102 &{$end};
103 }
104 };
105
106 my $xmlDecl = sub {
107 my ($encoding, $standalone) = (@_);
108 if ($standalone && $standalone ne 'no') {
109 $standalone = 'yes';
110 }
111 $encoding = "UTF-8" unless $encoding;
112 $output->print("<?xml version=\"1.0\"");
113 if ($encoding) {
114 $output->print(" encoding=\"$encoding\"");
115 }
116 if ($standalone) {
117 $output->print(" standalone=\"$standalone\"");
118 }
119 $output->print("?>\n");
120 };
121
122 my $SAFE_xmlDecl = sub {
123 if ($seen{ANYTHING}) {
124 croak("The XML declaration is not the first thing in the document");
125 } else {
126 $seen{ANYTHING} = 1;
127 $seen{XMLDECL} = 1;
128 &{$xmlDecl};
129 }
130 };
131
132 my $pi = sub {
133 my ($target, $data) = (@_);
134 if ($data) {
135 $output->print("<?$target $data?>");
136 } else {
137 $output->print("<?$target?>");
138 }
139 if ($elementLevel == 0) {
140 $output->print("\n");
141 }
142 };
143
144 my $SAFE_pi = sub {
145 my ($name, $data) = (@_);
146 $seen{ANYTHING} = 1;
147 if ($name =~ /xml/i) {
148 carp("Processing instruction target begins with 'xml'");
149 }
150
151 if ($name =~ /\?\>/ || $data =~ /\?\>/) {
152 croak("Processing instruction may not contain '?>'");
153 } else {
154 &{$pi};
155 }
156 };
157
158 my $comment = sub {
159 my $data = $_[0];
160 $output->print("<!-- $data -->");
161 if ($elementLevel == 0) {
162 $output->print("\n");
163 }
164 };
165
166 my $SAFE_comment = sub {
167 my $data = $_[0];
168 if ($data =~ /--/) {
169 carp("Interoperability problem: \"--\" in comment text");
170 }
171
172 if ($data =~ /-->/) {
173 croak("Comment may not contain '-->'");
174 } else {
175 $seen{ANYTHING} = 1;
176 &{$comment};
177 }
178 };
179
180 my $doctype = sub {
181 my ($name, $publicId, $systemId) = (@_);
182 $output->print("<!DOCTYPE $name");
183 if ($publicId) {
184 $output->print(" PUBLIC \"$publicId\" \"$systemId\"");
185 } elsif ($systemId) {
186 $output->print(" SYSTEM \"$systemId\"");
187 }
188 $output->print(">\n");
189 };
190
191 my $SAFE_doctype = sub {
192 my $name = $_[0];
193 if ($seen{DOCTYPE}) {
194 croak("Attempt to insert second DOCTYPE declaration");
195 } elsif ($seen{ELEMENT}) {
196 croak("The DOCTYPE declaration must come before the first start tag");
197 } else {
198 $seen{ANYTHING} = 1;
199 $seen{DOCTYPE} = $name;
200 &{$doctype};
201 }
202 };
203
204 my $startTag = sub {
205 my $name = $_[0];
206 if ($dataMode) {
207 $output->print("\n");
208 $output->print(" " x ($elementLevel * $dataIndent));
209 }
210 $elementLevel++;
211 push @elementStack, $name;
212 $output->print("<$name");
213 &{$showAttributes}(\@_);
214 $output->print("$nl>");
215 if ($dataMode) {
216 $hasElement = 1;
217 push @hasDataStack, $hasData;
218 $hasData = 0;
219 push @hasElementStack, $hasElement;
220 $hasElement = 0;
221 }
222 };
223
224 my $SAFE_startTag = sub {
225 my $name = $_[0];
226
227 _checkAttributes(\@_);
228
229 if ($seen{ELEMENT} && $elementLevel == 0) {
230 croak("Attempt to insert start tag after close of document element");
231 } elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) {
232 croak("Document element is \"$name\", but DOCTYPE is \""
233 . $seen{DOCTYPE}
234 . "\"");
235 } elsif ($dataMode && $hasData) {
236 croak("Mixed content not allowed in data mode: element $name");
237 } else {
238 $seen{ANYTHING} = 1;
239 $seen{ELEMENT} = 1;
240 &{$startTag};
241 }
242 };
243
244 my $emptyTag = sub {
245 my $name = $_[0];
246 if ($dataMode) {
247 $output->print("\n");
248 $output->print(" " x ($elementLevel * $dataIndent));
249 }
250 $output->print("<$name");
251 &{$showAttributes}(\@_);
252 $output->print("$nl />");
253 if ($dataMode) {
254 $hasElement = 1;
255 }
256 };
257
258 my $SAFE_emptyTag = sub {
259 my $name = $_[0];
260
261 _checkAttributes(\@_);
262
263 if ($seen{ELEMENT} && $elementLevel == 0) {
264 croak("Attempt to insert empty tag after close of document element");
265 } elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) {
266 croak("Document element is \"$name\", but DOCTYPE is \""
267 . $seen{DOCTYPE}
268 . "\"");
269 } elsif ($dataMode && $hasData) {
270 croak("Mixed content not allowed in data mode: element $name");
271 } else {
272 $seen{ANYTHING} = 1;
273 $seen{ELEMENT} = 1;
274 &{$emptyTag};
275 }
276 };
277
278 my $endTag = sub {
279 my $name = $_[0];
280 my $currentName = pop @elementStack;
281 $name = $currentName unless $name;
282 $elementLevel--;
283 if ($dataMode && $hasElement) {
284 $output->print("\n");
285 $output->print(" " x ($elementLevel * $dataIndent));
286 }
287 $output->print("</$name$nl>");
288 if ($dataMode) {
289 $hasData = pop @hasDataStack;
290 $hasElement = pop @hasElementStack;
291 }
292 };
293
294 my $SAFE_endTag = sub {
295 my $name = $_[0];
296 my $oldName = $elementStack[$#elementStack];
297 if ($elementLevel <= 0) {
298 croak("End tag \"$name\" does not close any open element");
299 } elsif ($name && ($name ne $oldName)) {
300 croak("Attempt to end element \"$oldName\" with \"$name\" tag");
301 } else {
302 &{$endTag};
303 }
304 };
305
306 my $characters = sub {
307 my $data = $_[0];
308 if ($data =~ /[\&\<\>]/) {
309 $data =~ s/\&/\&amp\;/g;
310 $data =~ s/\</\&lt\;/g;
311 $data =~ s/\>/\&gt\;/g;
312 }
313 $output->print($data);
314 $hasData = 1;
315 };
316
317 my $SAFE_characters = sub {
318 if ($elementLevel < 1) {
319 croak("Attempt to insert characters outside of document element");
320 } elsif ($dataMode && $hasElement) {
321 croak("Mixed content not allowed in data mode: characters");
322 } else {
323 &{$characters};
324 }
325 };
326
327 my $charactersXML = sub {
328 my $data = $_[0];
329 # if ($data =~ /[\&\<\>]/) {
330 # $data =~ s/\&/\&amp\;/g;
331 # $data =~ s/\</\&lt\;/g;
332 # $data =~ s/\>/\&gt\;/g;
333 # }
334 $output->print($data);
335 $hasData = 1;
336 };
337
338 my $SAFE_charactersXML = sub {
339 if ($elementLevel < 1) {
340 croak("Attempt to insert characters outside of document element");
341 } elsif ($dataMode && $hasElement) {
342 croak("Mixed content not allowed in data mode: characters");
343 } else {
344 &{$charactersXML};
345 }
346 };
347
348
349 # Assign the correct closures based on
350 # the UNSAFE parameter
351 if ($unsafe) {
352 $self = {'END' => $end,
353 'XMLDECL' => $xmlDecl,
354 'PI' => $pi,
355 'COMMENT' => $comment,
356 'DOCTYPE' => $doctype,
357 'STARTTAG' => $startTag,
358 'EMPTYTAG' => $emptyTag,
359 'ENDTAG' => $endTag,
360 'CHARACTERS' => $characters,
361 'CHARACTERSXML' => $charactersXML};
362 } else {
363 $self = {'END' => $SAFE_end,
364 'XMLDECL' => $SAFE_xmlDecl,
365 'PI' => $SAFE_pi,
366 'COMMENT' => $SAFE_comment,
367 'DOCTYPE' => $SAFE_doctype,
368 'STARTTAG' => $SAFE_startTag,
369 'EMPTYTAG' => $SAFE_emptyTag,
370 'ENDTAG' => $SAFE_endTag,
371 'CHARACTERS' => $SAFE_characters,
372 'CHARACTERSXML' => $SAFE_charactersXML};
373 }
374
375 # Query methods
376 $self->{'IN_ELEMENT'} = sub {
377 my ($ancestor) = (@_);
378 return $elementStack[$#elementStack] eq $ancestor;
379 };
380
381 $self->{'WITHIN_ELEMENT'} = sub {
382 my ($ancestor) = (@_);
383 my $el;
384 foreach $el (@elementStack) {
385 return 1 if $el eq $ancestor;
386 }
387 return 0;
388 };
389
390 $self->{'CURRENT_ELEMENT'} = sub {
391 return $elementStack[$#elementStack];
392 };
393
394 $self->{'ANCESTOR'} = sub {
395 my ($n) = (@_);
396 return $elementStack[$#elementStack-$n];
397 };
398
399 # Set and get the output destination.
400 $self->{'GETOUTPUT'} = sub {
401 return $output;
402 };
403
404 $self->{'SETOUTPUT'} = sub {
405 my $newOutput = $_[0];
406 # If there is no OUTPUT parameter,
407 # use standard output
408 unless ($newOutput) {
409 $newOutput = new IO::Handle();
410 $newOutput->fdopen(fileno(STDOUT), "w") ||
411 croak("Cannot write to standard output");
412 }
413 $output = $newOutput;
414 };
415
416 $self->{'SETDATAMODE'} = sub {
417 $dataMode = $_[0];
418 };
419
420 $self->{'GETDATAMODE'} = sub {
421 return $dataMode;
422 };
423
424 $self->{'SETDATAINDENT'} = sub {
425 $dataIndent = $_[0];
426 };
427
428 $self->{'GETDATAINDENT'} = sub {
429 return $dataIndent;
430 };
431
432 # Set the output.
433 &{$self->{'SETOUTPUT'}}($params{'OUTPUT'});
434
435 # Return the blessed object.
436 return bless $self, $class;
437}
438
439
440
441
442########################################################################
443# Public methods
444########################################################################
445
446#
447# Finish writing the document.
448#
449sub end {
450 my $self = shift;
451 &{$self->{END}};
452}
453
454#
455# Write an XML declaration.
456#
457sub xmlDecl {
458 my $self = shift;
459 &{$self->{XMLDECL}};
460}
461
462#
463# Write a processing instruction.
464#
465sub pi {
466 my $self = shift;
467 &{$self->{PI}};
468}
469
470#
471# Write a comment.
472#
473sub comment {
474 my $self = shift;
475 &{$self->{COMMENT}};
476}
477
478#
479# Write a DOCTYPE declaration.
480#
481sub doctype {
482 my $self = shift;
483 &{$self->{DOCTYPE}};
484}
485
486#
487# Write a start tag.
488#
489sub startTag {
490 my $self = shift;
491 &{$self->{STARTTAG}};
492}
493
494#
495# Write an empty tag.
496#
497sub emptyTag {
498 my $self = shift;
499 &{$self->{EMPTYTAG}};
500}
501
502#
503# Write an end tag.
504#
505sub endTag {
506 my $self = shift;
507 &{$self->{ENDTAG}};
508}
509
510#
511# Write a simple data element.
512#
513sub dataElement {
514 my ($self, $name, $data, %atts) = (@_);
515 $self->startTag($name, %atts);
516 $self->characters($data);
517 $self->endTag($name);
518}
519
520#
521# Write character data.
522#
523sub characters {
524 my $self = shift;
525 &{$self->{CHARACTERS}};
526}
527
528sub charactersXML {
529 my $self = shift;
530 &{$self->{CHARACTERSXML}};
531}
532
533#
534# Query the current element.
535#
536sub in_element {
537 my $self = shift;
538 return &{$self->{IN_ELEMENT}};
539}
540
541#
542# Query the ancestors.
543#
544sub within_element {
545 my $self = shift;
546 return &{$self->{WITHIN_ELEMENT}};
547}
548
549#
550# Get the name of the current element.
551#
552sub current_element {
553 my $self = shift;
554 return &{$self->{CURRENT_ELEMENT}};
555}
556
557#
558# Get the name of the numbered ancestor (zero-based).
559#
560sub ancestor {
561 my $self = shift;
562 return &{$self->{ANCESTOR}};
563}
564
565#
566# Get the current output destination.
567#
568sub getOutput {
569 my $self = shift;
570 return &{$self->{GETOUTPUT}};
571}
572
573
574#
575# Set the current output destination.
576#
577sub setOutput {
578 my $self = shift;
579 return &{$self->{SETOUTPUT}};
580}
581
582#
583# Set the current data mode (true or false).
584#
585sub setDataMode {
586 my $self = shift;
587 return &{$self->{SETDATAMODE}};
588}
589
590
591#
592# Get the current data mode (true or false).
593#
594sub getDataMode {
595 my $self = shift;
596 return &{$self->{GETDATAMODE}};
597}
598
599
600#
601# Set the current data indent step.
602#
603sub setDataIndent {
604 my $self = shift;
605 return &{$self->{SETDATAINDENT}};
606}
607
608
609#
610# Get the current data indent step.
611#
612sub getDataIndent {
613 my $self = shift;
614 return &{$self->{GETDATAINDENT}};
615}
616
617
618#
619# Empty stub.
620#
621sub addPrefix {
622}
623
624
625#
626# Empty stub.
627#
628sub removePrefix {
629}
630
631
632
633
634########################################################################
635# Private functions.
636########################################################################
637
638#
639# Private: check for duplicate attributes.
640# Note - this starts at $_[1], because $_[0] is assumed to be an
641# element name.
642#
643sub _checkAttributes {
644 my %anames;
645 my $i = 1;
646 while ($_[$i]) {
647 my $name = $_[$i];
648 $i += 2;
649 if ($anames{$name}) {
650 croak("Two attributes named \"$name\"");
651 } else {
652 $anames{$name} = 1;
653 }
654 }
655}
656
657#
658# Private: escape an attribute value literal.
659#
660sub _escapeLiteral {
661 my $data = $_[0];
662 if ($data =~ /[\&\<\>\"]/) {
663 $data =~ s/\&/\&amp\;/g;
664 $data =~ s/\</\&lt\;/g;
665 $data =~ s/\>/\&gt\;/g;
666 $data =~ s/\"/\&quot\;/g;
667 }
668 return $data;
669}
670
671
672
673
674########################################################################
675# XML::Writer::Namespaces - subclass for Namespace processing.
676########################################################################
677
678package XML::Writer::Namespaces;
679use strict;
680use vars qw(@ISA);
681use Carp;
682
683@ISA = qw(XML::Writer);
684
685#
686# Constructor
687#
688sub new {
689 my ($class, %params) = (@_);
690
691 my $unsafe = $params{UNSAFE};
692
693 # Snarf the prefix map, if any, and
694 # note the default prefix.
695 my %prefixMap = ();
696 if ($params{PREFIX_MAP}) {
697 %prefixMap = (%{$params{PREFIX_MAP}});
698 delete $params{PREFIX_MAP};
699 }
700 my $defaultPrefix = $prefixMap{''};
701 delete $prefixMap{''};
702
703 # Generate the reverse map for URIs
704 my %uriMap = ();
705 my $key;
706 foreach $key (keys(%prefixMap)) {
707 $uriMap{$prefixMap{$key}} = $key;
708 }
709
710 # Create an instance of the parent.
711 my $self = new XML::Writer(%params);
712
713 # Snarf the parent's methods that we're
714 # going to override.
715 my $OLD_startTag = $self->{STARTTAG};
716 my $OLD_emptyTag = $self->{EMPTYTAG};
717 my $OLD_endTag = $self->{ENDTAG};
718
719 # State variables
720 my $prefixCounter = 1;
721 my @nsDecls = ();
722 my $nsDecls = {};
723 my @nsDefaultDecl = ();
724 my $nsDefaultDecl = undef;
725 my @nsCopyFlag = ();
726 my $nsCopyFlag = 0;
727
728 #
729 # Push the current declaration state.
730 #
731 my $pushState = sub {
732 push @nsDecls, $nsDecls;
733 push @nsDefaultDecl, $nsDefaultDecl;
734 push @nsCopyFlag, $nsCopyFlag;
735 $nsCopyFlag = 0;
736 };
737
738
739 #
740 # Pop the current declaration state.
741 #
742 my $popState = sub {
743 $nsDecls = pop @nsDecls;
744 $nsDefaultDecl = pop @nsDefaultDecl;
745 $nsCopyFlag = pop @nsCopyFlag;
746 };
747
748 #
749 # Generate a new prefix.
750 #
751 my $genPrefix = sub {
752 my $prefix;
753 do {
754 $prefix = "__NS$prefixCounter";
755 $prefixCounter++;
756 } while ($uriMap{$prefix});
757 return $prefix;
758 };
759
760 #
761 # Perform namespace processing on a single name.
762 #
763 my $processName = sub {
764 my ($nameref, $atts, $attFlag) = (@_);
765 my ($uri, $local) = @{$$nameref};
766 my $prefix = $prefixMap{$uri};
767
768 # Is this an element name that matches
769 # the default NS?
770 if (!$attFlag && ($uri eq $defaultPrefix)) {
771 unless ($nsDefaultDecl) {
772 push @{$atts}, 'xmlns';
773 push @{$atts}, $uri;
774 $nsDefaultDecl = 1;
775 }
776 $$nameref = $local;
777
778 # Is there a straight-forward prefix?
779 } elsif ($prefix) {
780 unless ($nsDecls->{$uri}) {
781 # Copy on write (FIXME: duplicated)
782 unless ($nsCopyFlag) {
783 $nsCopyFlag = 1;
784 my %decls = (%{$nsDecls});
785 $nsDecls = \%decls;
786 }
787 $nsDecls->{$uri} = $prefix;
788 push @{$atts}, "xmlns:$prefix";
789 push @{$atts}, $uri;
790 }
791 $$nameref = "$prefix:$local";
792
793 } else {
794 $prefix = &{$genPrefix}();
795 $prefixMap{$uri} = $prefix;
796 $uriMap{$prefix} = $uri;
797 unless ($nsCopyFlag) {
798 $nsCopyFlag = 1;
799 my %decls = (%{$nsDecls});
800 $nsDecls = \%decls;
801 }
802 $nsDecls->{$uri} = $prefix;
803 push @{$atts}, "xmlns:$prefix";
804 push @{$atts}, $uri;
805 $$nameref = "$prefix:$local";
806 }
807 };
808
809
810 #
811 # Perform namespace processing on element and attribute names.
812 #
813 my $nsProcess = sub {
814 if (ref($_[0]->[0]) eq 'ARRAY') {
815 &{$processName}(\$_[0]->[0], $_[0], 0);
816 }
817 my $i = 1;
818 while ($_[0]->[$i]) {
819 if (ref($_[0]->[$i]) eq 'ARRAY') {
820 &{$processName}(\$_[0]->[$i], $_[0], 1);
821 }
822 $i += 2;
823 }
824 };
825
826 #
827 # Start tag, with NS processing
828 #
829 $self->{STARTTAG} = sub {
830 my $name = $_[0];
831 unless ($unsafe) {
832 _checkNSNames(\@_);
833 }
834 &{$pushState}();
835 &{$nsProcess}(\@_);
836 &{$OLD_startTag};
837 };
838
839
840 #
841 # Empty tag, with NS processing
842 #
843 $self->{EMPTYTAG} = sub {
844 unless ($unsafe) {
845 _checkNSNames(\@_);
846 }
847 &{$pushState}();
848 &{$nsProcess}(\@_);
849 &{$OLD_emptyTag};
850 &{$popState}();
851 };
852
853
854 #
855 # End tag, with NS processing
856 #
857 $self->{ENDTAG} = sub {
858 my $name = $_[0];
859 &{$nsProcess}(\@_);
860 &{$OLD_endTag};
861 &{$popState}();
862 };
863
864
865 #
866 # Processing instruction, but only if not UNSAFE.
867 #
868 unless ($unsafe) {
869 my $OLD_pi = $self->{PI};
870 $self->{PI} = sub {
871 my $target = $_[0];
872 if ($target =~ /:/) {
873 croak "PI target '$target' contains a colon.";
874 }
875 &{$OLD_pi};
876 }
877 };
878
879
880 #
881 # Add a prefix to the prefix map.
882 #
883 $self->{ADDPREFIX} = sub {
884 my ($uri, $prefix) = (@_);
885 if ($prefix) {
886 $prefixMap{$uri} = $prefix;
887 $uriMap{$prefix} = $uri;
888 } else {
889 $defaultPrefix = $uri;
890 }
891 };
892
893
894 #
895 # Remove a prefix from the prefix map.
896 #
897 $self->{REMOVEPREFIX} = sub {
898 my ($uri) = (@_);
899 if ($defaultPrefix eq $uri) {
900 $defaultPrefix = undef;
901 }
902 delete $prefixMap{$uri};
903 };
904
905
906 #
907 # Bless and return the object.
908 #
909 return bless $self, $class;
910}
911
912
913#
914# Add a preferred prefix for a namespace URI.
915#
916sub addPrefix {
917 my $self = shift;
918 return &{$self->{ADDPREFIX}};
919}
920
921
922#
923# Remove a preferred prefix for a namespace URI.
924#
925sub removePrefix {
926 my $self = shift;
927 return &{$self->{REMOVEPREFIX}};
928}
929
930
931#
932# Check names.
933#
934sub _checkNSNames {
935 my $names = $_[0];
936 my $i = 1;
937 my $name = $names->[0];
938
939 # Check the element name.
940 if (ref($name) eq 'ARRAY') {
941 if ($name->[1] =~ /:/) {
942 croak("Local part of element name '" .
943 $name->[1] .
944 "' contains a colon.");
945 }
946 } elsif ($name =~ /:/) {
947 croak("Element name '$name' contains a colon.");
948 }
949
950 # Check the attribute names.
951 while ($names->[$i]) {
952 my $name = $names->[$i];
953 if (ref($name) eq 'ARRAY') {
954 my $local = $name->[1];
955 if ($local =~ /:/) {
956 croak "Local part of attribute name '$local' contains a colon.";
957 }
958 } else {
959 if ($name =~ /^(xmlns|.*:)/) {
960 if ($name =~ /^xmlns/) {
961 croak "Attribute name '$name' begins with 'xmlns'";
962 } elsif ($name =~ /:/) {
963 croak "Attribute name '$name' contains ':'";
964 }
965 }
966 }
967 $i += 2;
968 }
969}
970
971
9721;
973__END__
974
975
976########################################################################
977# POD Documentation
978########################################################################
979
980=head1 NAME
981
982XML::Writer - Perl extension for writing XML documents.
983
984=head1 SYNOPSIS
985
986 use XML::Writer;
987 use IO;
988
989 my $output = new IO::File(">output.xml");
990
991 my $writer = new XML::Writer(OUTPUT => $output);
992 $writer->startTag("greeting",
993 "class" => "simple");
994 $writer->characters("Hello, world!");
995 $writer->endTag("greeting");
996 $writer->end();
997 $output->close();
998
999
1000=head1 DESCRIPTION
1001
1002XML::Writer is a helper module for Perl programs that write an XML
1003document. The module handles all escaping for attribute values and
1004character data and constructs different types of markup, such as tags,
1005comments, and processing instructions.
1006
1007By default, the module performs several well-formedness checks to
1008catch errors during output. This behaviour can be extremely useful
1009during development and debugging, but it can be turned off for
1010production-grade code.
1011
1012The module can operate either in regular mode in or Namespace
1013processing mode. In Namespace mode, the module will generate
1014Namespace Declarations itself, and will perform additional checks on
1015the output.
1016
1017Additional support is available for a simplified data mode with no
1018mixed content: newlines are automatically inserted around elements and
1019elements can optionally be indented based as their nesting level.
1020
1021
1022=head1 METHODS
1023
1024=head2 Writing XML
1025
1026=over 4
1027
1028=item new([$params])
1029
1030Create a new XML::Writer object:
1031
1032 my $writer = new XML::Writer(OUTPUT => $output, NEWLINES => 1);
1033
1034Arguments are an anonymous hash array of parameters:
1035
1036=over 4
1037
1038=item OUTPUT
1039
1040An object blessed into IO::Handle or one of its subclasses (such as
1041IO::File); if this parameter is not present, the module will write to
1042standard output.
1043
1044=item NAMESPACES
1045
1046A true (1) or false (0, undef) value; if this parameter is present and
1047its value is true, then the module will accept two-member array
1048reference in the place of element and attribute names, as in the
1049following example:
1050
1051 my $rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1052 my $writer = new XML::Writer(NAMESPACES => 1);
1053 $writer->startTag([$rdfns, "Description"]);
1054
1055The first member of the array is a namespace URI, and the second part
1056is the local part of a qualified name. The module will automatically
1057generate appropriate namespace declarations and will replace the URI
1058part with a prefix.
1059
1060=item PREFIX_MAP
1061
1062A hash reference; if this parameter is present and the module is
1063performing namespace processing (see the NAMESPACES parameter), then
1064the module will use this hash to look up preferred prefixes for
1065namespace URIs:
1066
1067
1068 my $rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1069 my $writer = new XML::Writer(NAMESPACES => 1,
1070 PREFIX_MAP => {$rdfns => 'rdf'});
1071
1072The keys in the hash table are namespace URIs, and the values are the
1073associated prefixes. If there is not a preferred prefix for the
1074namespace URI in this hash, then the module will automatically
1075generate prefixes of the form "__NS1", "__NS2", etc.
1076
1077To set the default namespace, use '' for the prefix.
1078
1079=item NEWLINES
1080
1081A true or false value; if this parameter is present and its value is
1082true, then the module will insert an extra newline before the closing
1083delimiter of start, end, and empty tags to guarantee that the document
1084does not end up as a single, long line. If the paramter is not
1085present, the module will not insert the newlines.
1086
1087=item UNSAFE
1088
1089A true or false value; if this parameter is present and its value is
1090true, then the module will skip most well-formedness error checking.
1091If the parameter is not present, the module will perform the
1092well-formedness error checking by default. Turn off error checking at
1093your own risk!
1094
1095=item DATA_MODE
1096
1097A true or false value; if this parameter is present and its value is
1098true, then the module will enter a special data mode, inserting
1099newlines automatically around elements and (unless UNSAFE is also
1100specified) reporting an error if any element has both characters and
1101elements as content.
1102
1103=item DATA_INDENT
1104
1105A numeric value; if this parameter is present, it represents the
1106indent step for elements in data mode (it will be ignored when not in
1107data mode).
1108
1109=back
1110
1111=item end()
1112
1113Finish creating an XML document. This method will check that the
1114document has exactly one document element, and that all start tags are
1115closed:
1116
1117 $writer->end();
1118
1119=item xmlDecl([$encoding, $standalone])
1120
1121Add an XML declaration to the beginning of an XML document. The
1122version will always be "1.0". If you provide a non-null encoding or
1123standalone argument, its value will appear in the declaration (and
1124non-null value for standalone except 'no' will automatically be
1125converted to 'yes').
1126
1127 $writer->xmlDecl("UTF-8");
1128
1129=item doctype($name, [$publicId, $systemId])
1130
1131Add a DOCTYPE declaration to an XML document. The declaration must
1132appear before the beginning of the root element. If you provide a
1133publicId, you must provide a systemId as well, but you may provide
1134just a system ID.
1135
1136 $writer->doctype("html");
1137
1138=item comment($text)
1139
1140Add a comment to an XML document. If the comment appears outside the
1141document element (either before the first start tag or after the last
1142end tag), the module will add a carriage return after it to improve
1143readability:
1144
1145 $writer->comment("This is a comment");
1146
1147=item pi($target [, $data])
1148
1149Add a processing instruction to an XML document:
1150
1151 $writer->pi('xml-stylesheet', 'href="style.css" type="text/css"');
1152
1153If the processing instruction appears outside the document element
1154(either before the first start tag or after the last end tag), the
1155module will add a carriage return after it to improve readability.
1156
1157The $target argument must be a single XML name. If you provide the
1158$data argument, the module will insert its contents following the
1159$target argument, separated by a single space.
1160
1161=item startTag($name [, $aname1 => $value1, ...])
1162
1163Add a start tag to an XML document. Any arguments after the element
1164name are assumed to be name/value pairs for attributes: the module
1165will escape all '&', '<', '>', and '"' characters in the attribute
1166values using the predefined XML entities:
1167
1168 $writer->startTag('doc', 'version' => '1.0',
1169 'status' => 'draft',
1170 'topic' => 'AT&T');
1171
1172All start tags must eventually have matching end tags.
1173
1174=item emptyTag($name [, $aname1 => $value1, ...])
1175
1176Add an empty tag to an XML document. Any arguments after the element
1177name are assumed to be name/value pairs for attributes (see startTag()
1178for details):
1179
1180 $writer->emptyTag('img', 'src' => 'portrait.jpg',
1181 'alt' => 'Portrait of Emma.');
1182
1183=item endTag([$name])
1184
1185Add an end tag to an XML document. The end tag must match the closest
1186open start tag, and there must be a matching and properly-nested end
1187tag for every start tag:
1188
1189 $writer->endTag('doc');
1190
1191If the $name argument is omitted, then the module will automatically
1192supply the name of the currently open element:
1193
1194 $writer->startTag('p');
1195 $writer->endTag();
1196
1197=item dataElement($name, $data [, $aname1 => $value1, ...])
1198
1199Print an entire element containing only character data. This is
1200equivalent to
1201
1202 $writer->startTag($name [, $aname1 => $value1, ...]);
1203 $writer->characters($data);
1204 $writer->endTag($name);
1205
1206=item characters($data)
1207
1208Add character data to an XML document. All '<', '>', and '&'
1209characters in the $data argument will automatically be escaped using
1210the predefined XML entities:
1211
1212 $writer->characters("Here is the formula: ");
1213 $writer->characters("a < 100 && a > 5");
1214
1215You may invoke this method only within the document element
1216(i.e. after the first start tag and before the last end tag).
1217
1218In data mode, you must not use this method to add whitespace between
1219elements.
1220
1221=item setOutput($output)
1222
1223Set the current output destination, as in the OUTPUT parameter for the
1224constructor.
1225
1226=item getOutput()
1227
1228Return the current output destination, as in the OUTPUT parameter for
1229the constructor.
1230
1231=item setDataMode($mode)
1232
1233Enable or disable data mode, as in the DATA_MODE parameter for the
1234constructor.
1235
1236=item getDataMode()
1237
1238Return the current data mode, as in the DATA_MODE parameter for the
1239constructor.
1240
1241=item setDataIndent($step)
1242
1243Set the indent step for data mode, as in the DATA_INDENT parameter for
1244the constructor.
1245
1246=item getDataIndent()
1247
1248Return the indent step for data mode, as in the DATA_INDENT parameter
1249for the constructor.
1250
1251
1252=back
1253
1254=head2 Querying XML
1255
1256=over 4
1257
1258=item in_element($name)
1259
1260Return a true value if the most recent open element matches $name:
1261
1262 if ($writer->in_element('dl')) {
1263 $writer->startTag('dt');
1264 } else {
1265 $writer->startTag('li');
1266 }
1267
1268=item within_element($name)
1269
1270Return a true value if any open elemnet matches $name:
1271
1272 if ($writer->within_element('body')) {
1273 $writer->startTag('h1');
1274 } else {
1275 $writer->startTag('title');
1276 }
1277
1278=item current_element()
1279
1280Return the name of the currently open element:
1281
1282 my $name = $writer->current_element();
1283
1284This is the equivalent of
1285
1286 my $name = $writer->ancestor(0);
1287
1288=item ancestor($n)
1289
1290Return the name of the nth ancestor, where $n=0 for the current open
1291element.
1292
1293=back
1294
1295
1296=head2 Additional Namespace Support
1297
1298WARNING: you must not use these methods while you are writing a
1299document, or the results will be unpredictable.
1300
1301=over 4
1302
1303=item addPrefix($uri, $prefix)
1304
1305Add a preferred mapping between a Namespace URI and a prefix. See
1306also the PREFIX_MAP constructor parameter.
1307
1308To set the default namespace, omit the $prefix parameter or set it to
1309''.
1310
1311=item removePrefix($uri)
1312
1313Remove a preferred mapping between a Namespace URI and a prefix.
1314
1315To set the default namespace, omit the $prefix parameter or set it to
1316''.
1317
1318=back
1319
1320
1321=head1 ERROR REPORTING
1322
1323With the default settings, the XML::Writer module can detect several
1324basic XML well-formedness errors:
1325
1326=over 4
1327
1328=item *
1329
1330Lack of a (top-level) document element, or multiple document elements.
1331
1332=item *
1333
1334Unclosed start tags.
1335
1336=item *
1337
1338Misplaced delimiters in the contents of processing instructions or
1339comments.
1340
1341=item *
1342
1343Misplaced or duplicate XML declaration(s).
1344
1345=item *
1346
1347Misplaced or duplicate DOCTYPE declaration(s).
1348
1349=item *
1350
1351Mismatch between the document type name in the DOCTYPE declaration and
1352the name of the document element.
1353
1354=item *
1355
1356Mismatched start and end tags.
1357
1358=item *
1359
1360Attempts to insert character data outside the document element.
1361
1362=item *
1363
1364Duplicate attributes with the same name.
1365
1366=back
1367
1368During Namespace processing, the module can detect the following
1369additional errors:
1370
1371=over 4
1372
1373=item *
1374
1375Attempts to use PI targets or element or attribute names containing a
1376colon.
1377
1378=item *
1379
1380Attempts to use attributes with names beginning "xmlns".
1381
1382=back
1383
1384To ensure full error detection, a program must also invoke the end
1385method when it has finished writing a document:
1386
1387 $writer->startTag('greeting');
1388 $writer->characters("Hello, world!");
1389 $writer->endTag('greeting');
1390 $writer->end();
1391
1392This error reporting can catch many hidden bugs in Perl programs that
1393create XML documents; however, if necessary, it can be turned off by
1394providing an UNSAFE parameter:
1395
1396 my $writer = new XML::Writer(OUTPUT => $output, UNSAFE => 1);
1397
1398
1399=head1 AUTHOR
1400
1401David Megginson, [email protected]
1402
1403=head1 SEE ALSO
1404
1405XML::Parser
1406
1407=cut
Note: See TracBrowser for help on using the repository browser.