source: main/trunk/package-kits/scripts/perllib/Greenstone/XML/Tidy.pm@ 29679

Last change on this file since 29679 was 29679, checked in by Jeremy Symon, 9 years ago

Switch to an Ordered Hash to stop the XML files from being scrambled

File size: 5.7 KB
Line 
1package Greenstone::XML::Tidy;
2
3use strict;
4use warnings;
5use utf8;
6use XML::Parser;
7use POSIX 'isatty';
8use Hash::Ordered;
9use base 'Exporter';
10
11our $VERSION = 1.00;
12our @EXPORT = ( 'read_xml', 'write_xml' );
13
14# Reads the servlets partial XML file using XML::Parser,
15# Then converts the obtained data structure to a more readable one
16# Example conversion:
17=xml
18<servlet>
19 <servlet-name>library</servlet-name>
20 <description attribute="example">The standard gsdl3 library program</description>
21 <servlet-class>org.greenstone.gsdl3.LibraryServlet</servlet-class>
22 <init-param>
23 <param-name>library_name</param-name>
24 <param-value>library</param-value>
25 </init-param>
26 <init-param>
27 <param-name>site_name</param-name>
28 <param-value>localsite</param-value>
29 </init-param>
30</servlet>
31=cut
32=data_structure
33{
34 'servlet' => {
35 'servlet-class' => 'org.greenstone.gsdl3.LibraryServlet',
36 'servlet-name' => 'library',
37 'description' => {
38 '.value' => 'The standard gsdl3 library program',
39 '.attr' => {
40 'attribute' => 'example'
41 }
42 },
43 'init-param' => [
44 {
45 'param-value' => 'library',
46 'param-name' => 'library_name'
47 },
48 {
49 'param-name' => 'site_name',
50 'param-value' => 'localsite'
51 }
52 ]
53 }
54}
55=cut
56sub read_xml {
57 my $file = shift;
58 my $xml;
59 if ($file eq '-') {
60 if (isatty *STDIN) {
61 print STDERR "Reading XML from STDIN. Press ^D to end\n";
62 }
63 local $/ = undef;
64 $xml = '<__root__>' . <STDIN> . '</__root__>';
65 } else {
66 $xml = '<!DOCTYPE doc [<!ENTITY real_doc SYSTEM "' . $file . '">]>
67 <__root__>&real_doc;</__root__>';
68 }
69 # Parse the data using XML::Parser
70 my $data = new XML::Parser (Style => 'Tree')->parse ($xml);
71
72 sub tidy {
73 my $hash = Hash::Ordered->new;
74 # If the element has attributes, add them to the hash
75 my $attr = shift;
76 if (scalar keys %$attr > 0) {
77 $hash->set ('.attr' => Hash::Ordered->new ($attr));
78 }
79 # Read any child elements or text value
80 while (@_) {
81 my $element = shift;
82 my $value = shift;
83 if ($element eq 0) {
84 unless ($value =~ /^\s*$/) {
85 my $existing = $hash->get ('.value');
86 defined $existing and ($value .= $existing);
87 $hash->set ('.value' => $value);
88 }
89 } else {
90 # If there is more than one of a single tupe of child element,
91 # That child element must become an array
92 if ($hash->exists ($element)) {
93 my $existing = $hash->get ($element);
94 unless (ref $existing eq 'ARRAY') {
95 $hash->set ($element => [ $existing ]);
96 }
97 push @{$hash->get ($element)}, tidy (@{$value});
98 } else {
99 $hash->set ($element => tidy (@{$value}));
100 }
101 }
102 }
103 # If the element only has a value, it can become a scalar
104 if (scalar $hash->keys == 1 and $hash->exists ('.value')) {
105 return $hash->get ('.value');
106 }
107 return $hash;
108 }
109 return tidy (@{@$data[1]});
110}
111
112# Writes a data structure to an XML file
113sub write_xml {
114 my ($hash, $file) = @_;
115 my $FH;
116 if ($file ne '-') {
117 unless (open $FH, '>:utf8', $file) {
118 print STDERR "Failed to open output file '$file': $!\n";
119 return 0;
120 }
121 select $FH;
122 }
123 sub open_tag {
124 my ($indent, $tag, $attr) = @_;
125 print $indent, "<", $tag;
126 if (defined $attr) {
127 for my $key ($attr->keys) {
128 print " ", $key, '="', $attr->get ($key), '"';
129 }
130 }
131 print ">";
132 }
133 sub write_element {
134 my ($indent, $hash) = @_;
135 for my $key ($hash->keys) {
136 $key eq '.attr' && next;
137 my $val = $hash->get ($key);
138 $key eq '.value' && do {
139 print $val, "\n";
140 next;
141 };
142 for (ref $val) {
143 /^ARRAY$/ && do {
144 for my $element (@$val) {
145 open_tag ($indent, $key, $element->get ('.attr'));
146 for (ref $element) {
147 /^Hash::Ordered$/ && do {
148 print "\n";
149 write_element (" $indent", $element);
150 print $indent, "</", $key, ">\n";
151 last;
152 };
153 print $element, "</", $key, ">\n";
154 }
155 }
156 last;
157 };
158 /^Hash::Ordered$/ && do {
159 open_tag ($indent, $key, $val->get ('.attr'));
160 # If the element only has a value with attributes,
161 # it can be formatted on one line
162 if (scalar keys %$val == 2 and $val->exists ('.value')) {
163 print $val->get ('.value'), "</", $key, ">\n";
164 } else {
165 print "\n";
166 write_element (" $indent", $val);
167 print $indent, "</", $key, ">\n";
168 }
169 last;
170 };
171 open_tag ($indent, $key);
172 print $val;
173 print "</", $key, ">\n";
174 }
175 }
176 }
177 write_element ("", $hash);
178 if ($file ne '-') {
179 select STDOUT;
180 close $FH;
181 }
182 return 1;
183}
184
1851;
Note: See TracBrowser for help on using the repository browser.