source: main/trunk/package-kits/scripts/perllib/Greenstone/XML/Tidy.pm@ 29678

Last change on this file since 29678 was 29678, checked in by Jeremy Symon, 9 years ago

Moved xml i/o handling to the module for simplicity of use

File size: 5.4 KB
RevLine 
[29677]1package Greenstone::XML::Tidy;
[29673]2
3use strict;
4use warnings;
5use utf8;
6use XML::Parser;
[29678]7use POSIX 'isatty';
[29673]8use base 'Exporter';
9
10our $VERSION = 1.00;
11our @EXPORT = ( 'read_xml', 'write_xml' );
12
13# Reads the servlets partial XML file using XML::Parser,
14# Then converts the obtained data structure to a more readable one
15# Example conversion:
16=xml
17<servlet>
18 <servlet-name>library</servlet-name>
19 <description attribute="example">The standard gsdl3 library program</description>
20 <servlet-class>org.greenstone.gsdl3.LibraryServlet</servlet-class>
21 <init-param>
22 <param-name>library_name</param-name>
23 <param-value>library</param-value>
24 </init-param>
25 <init-param>
26 <param-name>site_name</param-name>
27 <param-value>localsite</param-value>
28 </init-param>
29</servlet>
30=cut
31=data_structure
32{
33 'servlet' => {
34 'servlet-class' => 'org.greenstone.gsdl3.LibraryServlet',
35 'servlet-name' => 'library',
36 'description' => {
37 '.value' => 'The standard gsdl3 library program',
38 '.attr' => {
39 'attribute' => 'example'
40 }
41 },
42 'init-param' => [
43 {
44 'param-value' => 'library',
45 'param-name' => 'library_name'
46 },
47 {
48 'param-name' => 'site_name',
49 'param-value' => 'localsite'
50 }
51 ]
52 }
53}
54=cut
55sub read_xml {
[29678]56 my $file = shift;
57 my $xml;
58 if ($file eq '-') {
59 if (isatty *STDIN) {
60 print STDERR "Reading XML from STDIN. Press ^D to end\n";
61 }
62 local $/ = undef;
63 $xml = '<__root__>' . <STDIN> . '</__root__>';
64 } else {
65 $xml = '<!DOCTYPE doc [<!ENTITY real_doc SYSTEM "' . $file . '">]>
66 <__root__>&real_doc;</__root__>';
67 }
[29673]68 # Parse the data using XML::Parser
69 my $data = new XML::Parser (Style => 'Tree')->parse ($xml);
70
71 sub tidy {
72 my %hash;
73 # If the element has attributes, add them to the hash
74 my $attr = shift;
75 if (scalar keys %$attr > 0) {
76 $hash{'.attr'} = $attr;
77 }
78 # Read any child elements or text value
79 while (@_) {
80 my $element = shift;
81 my $value = shift;
82 if ($element eq 0) {
83 $value =~ /^\s*$/ || ($hash{'.value'} .= $value);
84 } else {
85 # If there is more than one of a single tupe of child element,
86 # That child element must become an array
87 if (defined $hash{$element}) {
88 unless (ref $hash{$element} eq 'ARRAY') {
89 $hash{$element} = [ $hash{$element} ];
90 }
91 push @{$hash{$element}}, tidy (@{$value});
92 } else {
93 $hash{$element} = tidy (@{$value});
94 }
95 }
96 }
97 # If the element only has a value, it can become a scalar
98 if (scalar keys %hash == 1 and defined $hash{'.value'}) {
99 return $hash{'.value'};
100 }
101 return \%hash;
102 }
103 return tidy (@{@$data[1]});
104}
105
106# Writes a data structure to an XML file
107sub write_xml {
[29678]108 my ($hash, $file) = @_;
109 my $FH;
110 if ($file ne '-') {
111 unless (open $FH, '>:utf8', $file) {
112 print STDERR "Failed to open output file '$file': $!\n";
113 return 0;
114 }
115 select $FH;
116 }
[29673]117 sub open_tag {
118 my ($indent, $tag, $attr) = @_;
119 print $indent, "<", $tag;
120 if (defined $attr) {
121 for my $key (sort keys %$attr) {
122 print " ", $key, '="', $attr->{$key}, '"';
123 }
124 }
125 print ">";
126 }
127 sub write_element {
128 my ($indent, $hash) = @_;
129 for my $key (sort keys %$hash) {
130 $key eq '.attr' && next;
131 my $val = $hash->{$key};
132 $key eq '.value' && do {
133 print $val, "\n";
134 next;
135 };
136 for (ref $val) {
137 /^ARRAY$/ && do {
138 for my $element (@$val) {
139 open_tag ($indent, $key, $element->{'.attr'});
140 for (ref $element) {
141 /^HASH$/ && do {
142 print "\n";
143 write_element (" $indent", $element);
144 print $indent, "</", $key, ">\n";
145 last;
146 };
147 print $element, "</", $key, ">\n";
148 }
149 }
150 last;
151 };
152 /^HASH$/ && do {
153 open_tag ($indent, $key, $val->{'.attr'});
154 # If the element only has a value with attributes,
155 # it can be formatted on one line
156 if (scalar keys %$val == 2 and defined $val->{'.value'}) {
157 print $val->{'.value'}, "</", $key, ">\n";
158 } else {
159 print "\n";
160 write_element (" $indent", $val);
161 print $indent, "</", $key, ">\n";
162 }
163 last;
164 };
165 open_tag ($indent, $key);
166 print $val;
167 print "</", $key, ">\n";
168 }
169 }
170 }
171 write_element ("", $hash);
[29678]172 if ($file ne '-') {
173 select STDOUT;
174 close $FH;
175 }
176 return 1;
[29673]177}
[29678]178
1791;
Note: See TracBrowser for help on using the repository browser.