Changeset 13170
- Timestamp:
- 2006-10-27T13:37:58+13:00 (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/docprint.pm
r8716 r13170 1 1 ########################################################################### 2 2 # 3 # docprint.pm 3 # docprint.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the 6 6 # University of Waikato, New Zealand. 7 7 # 8 # Copyright (C) 1999New Zealand Digital Library Project8 # Copyright (C) 2006 New Zealand Digital Library Project 9 9 # 10 10 # This program is free software; you can redistribute it and/or modify … … 24 24 ########################################################################### 25 25 26 # This document processor prints a document out to STDOUT 27 26 # This is used to output an XML representation of a doc_obj - this will be 27 # GA format. 28 # This is used by GAPlugout, doc.pm and incremental_build 28 29 29 30 package docprint; 30 31 31 use arcinfo; 32 use docproc; 33 use util; 32 use strict; 34 33 34 sub get_section_xml { 35 36 my ($doc_obj, $section) = @_; 35 37 36 sub BEGIN { 37 @docprint::ISA = ('docproc'); 38 my $section_ptr = $doc_obj->_lookup_section ($section); 39 return "" unless defined $section_ptr; 40 41 my $all_text = "<Section>\n"; 42 $all_text .= " <Description>\n"; 43 44 # output metadata 45 foreach my $data (@{$section_ptr->{'metadata'}}) { 46 my $escaped_value = &escape_text($data->[1]); 47 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n"; 48 } 49 50 $all_text .= " </Description>\n"; 51 52 # output the text 53 $all_text .= " <Content>"; 54 $all_text .= &escape_text($section_ptr->{'text'}); 55 $all_text .= "</Content>\n"; 56 57 # output all the subsections 58 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) { 59 $all_text .= &get_section_xml($doc_obj, "$section.$subsection"); 60 } 61 62 $all_text .= "</Section>\n"; 63 64 # make sure no nasty control characters have snuck through 65 # (XML::Parser will barf on anything it doesn't consider to be 66 # valid UTF-8 text, including things like \c@, \cC etc.) 67 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 68 69 return $all_text; 38 70 } 39 71 40 sub new { 41 my ($class) = @_; 42 my $self = new docproc (); 72 sub escape_text { 73 my ($text) = @_; 74 # special characters in the xml encoding 75 $text =~ s/&/&/g; # this has to be first... 76 $text =~ s/</</g; 77 $text =~ s/>/>/g; 78 $text =~ s/\"/"/g; 43 79 44 return bless $self, $class;80 return $text; 45 81 } 46 82 47 sub process {48 my $self = shift (@_);49 my ($doc_obj) = @_;50 51 # add associated files as metadata to the document52 my @assoc_files = ();53 foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) {54 if (-e $assoc_file->[0]) {55 $doc_obj->add_metadata ($doc_obj->get_top_section(),56 "gsdlassocfile",57 "$assoc_file->[1]:$assoc_file->[2]");58 }59 }60 61 # output document62 $doc_obj->output_section(STDOUT, $doc_obj->get_top_section(),63 undef, undef, 0);64 }65 66 67 83 1;
Note:
See TracChangeset
for help on using the changeset viewer.