source: gsdl/trunk/perllib/docprint.pm@ 19214

Last change on this file since 19214 was 19214, checked in by kjdon, 15 years ago

double && seemed to cause trouble for diego

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
RevLine 
[782]1###########################################################################
2#
[13170]3# docprint.pm --
[782]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
[13170]8# Copyright (C) 2006 New Zealand Digital Library Project
[782]9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[13170]26# This is used to output an XML representation of a doc_obj - this will be
[17747]27# Greenstone XML format.
28# This is used by GreenstoneXMLPlugout and doc.pm
[782]29
30package docprint;
31
[13170]32use strict;
[782]33
[13170]34sub get_section_xml {
35
36 my ($doc_obj, $section) = @_;
[782]37
[13170]38 my $section_ptr = $doc_obj->_lookup_section ($section);
39 return "" unless defined $section_ptr;
[782]40
[13170]41 my $all_text = "<Section>\n";
42 $all_text .= " <Description>\n";
43
44 # output metadata
45 foreach my $data (@{$section_ptr->{'metadata'}}) {
46 my $escaped_value = &escape_text($data->[1]);
47 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
48 }
[782]49
[13170]50 $all_text .= " </Description>\n";
[782]51
[13170]52 # output the text
53 $all_text .= " <Content>";
54 $all_text .= &escape_text($section_ptr->{'text'});
55 $all_text .= "</Content>\n";
56
57 # output all the subsections
58 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
59 $all_text .= &get_section_xml($doc_obj, "$section.$subsection");
[782]60 }
61
[13170]62 $all_text .= "</Section>\n";
63
64 # make sure no nasty control characters have snuck through
65 # (XML::Parser will barf on anything it doesn't consider to be
66 # valid UTF-8 text, including things like \c@, \cC etc.)
67 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
68
69 return $all_text;
[782]70}
71
[13170]72sub escape_text {
73 my ($text) = @_;
74 # special characters in the xml encoding
[19214]75 $text =~ s/&&/& &/g;
[13170]76 $text =~ s/&/&amp;/g; # this has to be first...
77 $text =~ s/</&lt;/g;
78 $text =~ s/>/&gt;/g;
79 $text =~ s/\"/&quot;/g;
[782]80
[13170]81 return $text;
82}
83
[782]841;
Note: See TracBrowser for help on using the repository browser.