Changeset 13170


Ignore:
Timestamp:
2006-10-27T13:37:58+13:00 (17 years ago)
Author:
kjdon
Message:

docprint is no longer a docproc object. Its now a perl module that outputs a (GA) XMl representation of a section of a doc obj. Also has an escape_text function

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/docprint.pm

    r8716 r13170  
    11###########################################################################
    22#
    3 # docprint.pm
     3# docprint.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
    66# University of Waikato, New Zealand.
    77#
    8 # Copyright (C) 1999 New Zealand Digital Library Project
     8# Copyright (C) 2006 New Zealand Digital Library Project
    99#
    1010# This program is free software; you can redistribute it and/or modify
     
    2424###########################################################################
    2525
    26 # This document processor prints a document out to STDOUT
    27 
     26# This is used to output an XML representation of a doc_obj - this will be
     27# GA format.
     28# This is used by GAPlugout, doc.pm and incremental_build
    2829
    2930package docprint;
    3031
    31 use arcinfo;
    32 use docproc;
    33 use util;
     32use strict;
    3433
     34sub get_section_xml {
     35   
     36    my ($doc_obj, $section) = @_;
    3537
    36 sub BEGIN {
    37     @docprint::ISA = ('docproc');
     38    my $section_ptr = $doc_obj->_lookup_section ($section);
     39    return "" unless defined $section_ptr;
     40
     41    my $all_text = "<Section>\n";
     42    $all_text .= "  <Description>\n";
     43   
     44    # output metadata
     45    foreach my $data (@{$section_ptr->{'metadata'}}) {
     46    my $escaped_value = &escape_text($data->[1]);
     47    $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
     48    }
     49
     50    $all_text .= "  </Description>\n";
     51
     52    # output the text
     53    $all_text .= "  <Content>";
     54    $all_text .= &escape_text($section_ptr->{'text'});
     55    $all_text .= "</Content>\n";
     56   
     57    # output all the subsections
     58    foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
     59    $all_text .= &get_section_xml($doc_obj, "$section.$subsection");
     60    }
     61   
     62    $all_text .=  "</Section>\n";
     63
     64    # make sure no nasty control characters have snuck through
     65    # (XML::Parser will barf on anything it doesn't consider to be
     66    # valid UTF-8 text, including things like \c@, \cC etc.)
     67    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
     68
     69    return $all_text;
    3870}
    3971
    40 sub new {
    41     my ($class) = @_;
    42     my $self = new docproc ();
     72sub escape_text {
     73    my ($text) = @_;
     74    # special characters in the xml encoding
     75    $text =~ s/&/&amp;/g; # this has to be first...
     76    $text =~ s/</&lt;/g;
     77    $text =~ s/>/&gt;/g;
     78    $text =~ s/\"/&quot;/g;
    4379
    44     return bless $self, $class;
     80    return $text;
    4581}
    4682
    47 sub process {
    48     my $self = shift (@_);
    49     my ($doc_obj) = @_;
    50 
    51     # add associated files as metadata to the document
    52     my @assoc_files = ();
    53     foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) {
    54     if (-e $assoc_file->[0]) {
    55         $doc_obj->add_metadata ($doc_obj->get_top_section(),
    56                     "gsdlassocfile",
    57                     "$assoc_file->[1]:$assoc_file->[2]");
    58     }
    59     }
    60    
    61     # output document
    62     $doc_obj->output_section(STDOUT, $doc_obj->get_top_section(),
    63                  undef, undef, 0);
    64 }
    65 
    66 
    67831;
Note: See TracChangeset for help on using the changeset viewer.