package Greenstone::XML::Tidy;
use strict;
use warnings;
use utf8;
use XML::Parser;
use POSIX 'isatty';
use base 'Exporter';
our $VERSION = 1.00;
our @EXPORT = ( 'read_xml', 'write_xml' );
# Reads the servlets partial XML file using XML::Parser,
# Then converts the obtained data structure to a more readable one
# Example conversion:
=xml
library
The standard gsdl3 library program
org.greenstone.gsdl3.LibraryServlet
library_name
library
site_name
localsite
=cut
=data_structure
{
'servlet' => {
'servlet-class' => 'org.greenstone.gsdl3.LibraryServlet',
'servlet-name' => 'library',
'description' => {
'.value' => 'The standard gsdl3 library program',
'.attr' => {
'attribute' => 'example'
}
},
'init-param' => [
{
'param-value' => 'library',
'param-name' => 'library_name'
},
{
'param-name' => 'site_name',
'param-value' => 'localsite'
}
]
}
}
=cut
sub read_xml {
my $file = shift;
my $xml;
if ($file eq '-') {
if (isatty *STDIN) {
print STDERR "Reading XML from STDIN. Press ^D to end\n";
}
local $/ = undef;
$xml = '<__root__>' . . '';
} else {
$xml = ']>
<__root__>&real_doc;';
}
# Parse the data using XML::Parser
my $data = new XML::Parser (Style => 'Tree')->parse ($xml);
sub tidy {
my %hash;
# If the element has attributes, add them to the hash
my $attr = shift;
if (scalar keys %$attr > 0) {
$hash{'.attr'} = $attr;
}
# Read any child elements or text value
while (@_) {
my $element = shift;
my $value = shift;
if ($element eq 0) {
$value =~ /^\s*$/ || ($hash{'.value'} .= $value);
} else {
# If there is more than one of a single tupe of child element,
# That child element must become an array
if (defined $hash{$element}) {
unless (ref $hash{$element} eq 'ARRAY') {
$hash{$element} = [ $hash{$element} ];
}
push @{$hash{$element}}, tidy (@{$value});
} else {
$hash{$element} = tidy (@{$value});
}
}
}
# If the element only has a value, it can become a scalar
if (scalar keys %hash == 1 and defined $hash{'.value'}) {
return $hash{'.value'};
}
return \%hash;
}
return tidy (@{@$data[1]});
}
# Writes a data structure to an XML file
sub write_xml {
my ($hash, $file) = @_;
my $FH;
if ($file ne '-') {
unless (open $FH, '>:utf8', $file) {
print STDERR "Failed to open output file '$file': $!\n";
return 0;
}
select $FH;
}
sub open_tag {
my ($indent, $tag, $attr) = @_;
print $indent, "<", $tag;
if (defined $attr) {
for my $key (sort keys %$attr) {
print " ", $key, '="', $attr->{$key}, '"';
}
}
print ">";
}
sub write_element {
my ($indent, $hash) = @_;
for my $key (sort keys %$hash) {
$key eq '.attr' && next;
my $val = $hash->{$key};
$key eq '.value' && do {
print $val, "\n";
next;
};
for (ref $val) {
/^ARRAY$/ && do {
for my $element (@$val) {
open_tag ($indent, $key, $element->{'.attr'});
for (ref $element) {
/^HASH$/ && do {
print "\n";
write_element (" $indent", $element);
print $indent, "", $key, ">\n";
last;
};
print $element, "", $key, ">\n";
}
}
last;
};
/^HASH$/ && do {
open_tag ($indent, $key, $val->{'.attr'});
# If the element only has a value with attributes,
# it can be formatted on one line
if (scalar keys %$val == 2 and defined $val->{'.value'}) {
print $val->{'.value'}, "", $key, ">\n";
} else {
print "\n";
write_element (" $indent", $val);
print $indent, "", $key, ">\n";
}
last;
};
open_tag ($indent, $key);
print $val;
print "", $key, ">\n";
}
}
}
write_element ("", $hash);
if ($file ne '-') {
select STDOUT;
close $FH;
}
return 1;
}
1;