source: main/trunk/package-kits/scripts/perllib/Greenstone/XML/Tidy.pm@ 29680

Last change on this file since 29680 was 29680, checked in by Jeremy Symon, 9 years ago

Fixed problems with XML parser introduced by the ordered hash change

File size: 6.1 KB
Line 
1package Greenstone::XML::Tidy;
2
3use strict;
4use warnings;
5use utf8;
6use XML::Parser;
7use POSIX 'isatty';
8use Hash::Ordered;
9use base 'Exporter';
10
11our $VERSION = 1.00;
12our @EXPORT = ( 'read_xml', 'write_xml' );
13
14# Reads the servlets partial XML file using XML::Parser,
15# Then converts the obtained data structure to a more readable one
16# Example conversion:
17=xml
18<servlet>
19 <servlet-name>library</servlet-name>
20 <description attribute="example">The standard gsdl3 library program</description>
21 <servlet-class>org.greenstone.gsdl3.LibraryServlet</servlet-class>
22 <init-param>
23 <param-name>library_name</param-name>
24 <param-value>library</param-value>
25 </init-param>
26 <init-param>
27 <param-name>site_name</param-name>
28 <param-value>localsite</param-value>
29 </init-param>
30</servlet>
31=cut
32=data_structure
33{
34 'servlet' => {
35 'servlet-class' => 'org.greenstone.gsdl3.LibraryServlet',
36 'servlet-name' => 'library',
37 'description' => {
38 '.value' => 'The standard gsdl3 library program',
39 '.attr' => {
40 'attribute' => 'example'
41 }
42 },
43 'init-param' => [
44 {
45 'param-value' => 'library',
46 'param-name' => 'library_name'
47 },
48 {
49 'param-name' => 'site_name',
50 'param-value' => 'localsite'
51 }
52 ]
53 }
54}
55=cut
56sub read_xml {
57 my $file = shift;
58 my $xml;
59 if ($file eq '-') {
60 if (isatty *STDIN) {
61 print STDERR "Reading XML from STDIN. Press ^D to end\n";
62 }
63 local $/ = undef;
64 $xml = '<__root__>' . <STDIN> . '</__root__>';
65 } else {
66 $xml = '<!DOCTYPE doc [<!ENTITY real_doc SYSTEM "' . $file . '">]>
67 <__root__>&real_doc;</__root__>';
68 }
69 # Parse the data using XML::Parser
70 my $data = new XML::Parser (Style => 'Tree')->parse ($xml);
71
72 sub tidy {
73 my $hash = Hash::Ordered->new;
74 # If the element has attributes, add them to the hash
75 my $attr = shift;
76 if (scalar keys %$attr > 0) {
77 my $attr_ordered = Hash::Ordered->new;
78 for my $key (keys %$attr) {
79 $attr_ordered->set ($key => $attr->{$key});
80 }
81 $hash->set ('.attr' => $attr_ordered);
82 }
83 # Read any child elements or text value
84 while (@_) {
85 my $element = shift;
86 my $value = shift;
87 if ($element eq 0) {
88 unless ($value =~ /^\s*$/) {
89 my $existing = $hash->get ('.value');
90 defined $existing and ($value .= $existing);
91 $hash->set ('.value' => $value);
92 }
93 } else {
94 # If there is more than one of a single tupe of child element,
95 # That child element must become an array
96 if ($hash->exists ($element)) {
97 my $existing = $hash->get ($element);
98 unless (ref $existing eq 'ARRAY') {
99 $hash->set ($element => [ $existing ]);
100 }
101 push @{$hash->get ($element)}, tidy (@{$value});
102 } else {
103 $hash->set ($element => tidy (@{$value}));
104 }
105 }
106 }
107 # If the element only has a value, it can become a scalar
108 if (scalar $hash->keys == 1 and $hash->exists ('.value')) {
109 return $hash->get ('.value');
110 }
111 return $hash;
112 }
113 return tidy (@{@$data[1]});
114}
115
116# Writes a data structure to an XML file
117sub write_xml {
118 my ($hash, $file) = @_;
119 my $FH;
120 if ($file ne '-') {
121 unless (open $FH, '>:utf8', $file) {
122 print STDERR "Failed to open output file '$file': $!\n";
123 return 0;
124 }
125 select $FH;
126 }
127 sub escape {
128 my @r;
129 for (@_) {
130 my $str = ''. $_;
131 $str =~ s/(['\\])/\\$1'/g;
132 push @r, $str;
133 }
134 return @r;
135 }
136 sub open_tag {
137 my ($indent, $tag, $attr) = @_;
138 print $indent, "<", $tag;
139 if (defined $attr) {
140 my $iter = $attr->iterator;
141 while (my ($key, $value) = $iter->()) {
142 print " ", $key, '="', escape $value, '"';
143 }
144 }
145 print ">";
146 }
147 sub write_element {
148 my ($indent, $hash) = @_;
149 for my $key ($hash->keys) {
150 $key eq '.attr' && next;
151 my $val = $hash->get ($key);
152 $key eq '.value' && do {
153 print $val, "\n";
154 next;
155 };
156 for (ref $val) {
157 /^ARRAY$/ && do {
158 for my $element (@$val) {
159 open_tag ($indent, $key, $element->get ('.attr'));
160 for (ref $element) {
161 /^Hash::Ordered$/ && do {
162 print "\n";
163 write_element (" $indent", $element);
164 print $indent, "</", $key, ">\n";
165 last;
166 };
167 print $element, "</", $key, ">\n";
168 }
169 }
170 last;
171 };
172 /^Hash::Ordered$/ && do {
173 open_tag ($indent, $key, $val->get ('.attr'));
174 # If the element only has a value with attributes,
175 # it can be formatted on one line
176 if (scalar $val->keys == 2 and $val->exists ('.value')) {
177 print $val->get ('.value'), "</", $key, ">\n";
178 } else {
179 print "\n";
180 write_element (" $indent", $val);
181 print $indent, "</", $key, ">\n";
182 }
183 last;
184 };
185 open_tag ($indent, $key);
186 print $val;
187 print "</", $key, ">\n";
188 }
189 }
190 }
191 write_element ("", $hash);
192 if ($file ne '-') {
193 select STDOUT;
194 close $FH;
195 }
196 return 1;
197}
198
1991;
Note: See TracBrowser for help on using the repository browser.