Changeset 2810


Ignore:
Timestamp:
2001-10-31T19:37:58+13:00 (22 years ago)
Author:
sjboddie
Message:

Created GAPlug (and XMLPlug base class) to replace the old GMLPlug.
Greenstone archives will now be stored as proper XML documents (with .xml
file extension) instead of the old .gml files.

Location:
trunk/gsdl/perllib
Files:
2 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r2484 r2810  
    155155}
    156156
    157 
    158 sub buffer_section_gml {
    159     my $self = shift (@_);
    160     my ($section, $suppress_subject_info) = @_;
    161 
    162     $suppress_subject_info = 0 unless defined $suppress_subject_info;
    163     my ($all_text,$data, $subsection);
    164    
     157sub buffer_section_xml {
     158    my $self = shift (@_);
     159    my ($section) = @_;
     160
    165161    my $section_ptr = $self->_lookup_section ($section);
    166     my ($section_num) = $section =~ /(\d+)$/;
    167    
    168162    return "" unless defined $section_ptr;
    169163
    170     # output the section header (including the section number
    171     # and metadata)
    172 
    173     $all_text = "<gsdlsection";
    174     $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
    175     foreach $data (@{$section_ptr->{'metadata'}}) {
    176     $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
    177         unless $suppress_subject_info && $data->[0] eq "Subject";
    178     }
    179     $all_text .= ">";
     164    my $all_text = "<Section>\n";
     165    $all_text .= "  <Description>\n";
     166   
     167    # output metadata
     168    foreach my $data (@{$section_ptr->{'metadata'}}) {
     169    my $escaped_value = &_escape_text($data->[1]);
     170    $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
     171    }
     172
     173    $all_text .= "  </Description>\n";
    180174
    181175    # output the text
     176    $all_text .= "  <Content>\n";
    182177    $all_text .= &_escape_text($section_ptr->{'text'});
    183 
     178    $all_text .= "  </Content>\n";
     179   
    184180    # output all the subsections
    185     foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
    186     $all_text .= $self->buffer_section_gml("$section.$subsection",
    187                            $suppress_subject_info);
    188     }
    189    
    190     # output the closing tag
    191     $all_text .=  "</gsdlsection>\n";
     181    foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
     182    $all_text .= $self->buffer_section_xml("$section.$subsection");
     183    }
     184   
     185    $all_text .=  "</Section>\n";
    192186
    193187    return $all_text;
    194188}
    195189
    196 
    197 sub buffer_section_xml {
    198     my $self = shift (@_);
    199     my ($section, $dtd_metadata, $suppress_subject_info) = @_;
    200 
    201     $suppress_subject_info = 0 unless defined $suppress_subject_info;
    202     my ($all_text, $data, $subsection);
    203    
    204     my $section_ptr = $self->_lookup_section ($section);
    205     my ($section_num) = $section =~ /(\d+)$/;
    206    
    207     return "" unless defined $section_ptr;
    208 
    209     # output the section header (including the section number
    210     # and metadata)
    211 
    212     $all_text .= "<gsdlsection";
    213     $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
    214     $all_text .= ">\n";
    215    
    216     $all_text .= "  <metadata>\n";
    217    
    218     # output metadata
    219     foreach $data (@{$section_ptr->{'metadata'}}) {
    220     my $tag_name = $data->[0];
    221 
    222     # a tagname beginning with '/' (like </srclink>) will cause problems
    223     # so we'll escape any leading '/'
    224     $tag_name =~ s/^\//&\#47;/;
    225 
    226     my $tag_value = &_escape_text($data->[1]);
    227 
    228     unless ($suppress_subject_info && $tag_name eq "Subject")
    229     {
    230         if (defined $dtd_metadata)
    231         {
    232         $dtd_metadata->{$tag_name}++;
    233         }
    234 
    235         $all_text .= "    <$tag_name>$tag_value</$tag_name>\n";
    236     }
    237     }
    238 
    239     $all_text .= "  </metadata>\n";
    240 
    241     # output the text
    242     $all_text .= &_escape_text($section_ptr->{'text'});
    243 
    244     # output all the subsections
    245     foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
    246     $all_text .= $self->buffer_section_xml("$section.$subsection",
    247                            $dtd_metadata,
    248                            $suppress_subject_info);
    249     }
    250    
    251     # output the closing tag
    252     $all_text .=  "</gsdlsection>\n";
    253 
    254     return $all_text;
    255 }
    256 
    257190sub output_section {
    258191    my $self = shift (@_);
    259     my ($handle, $section, $colname, $dtd_metadata,
    260     $suppress_subject_info) = @_;
    261 
    262     my $all_text = $self->buffer_section_xml($section, $dtd_metadata,
    263                          $suppress_subject_info);
    264 
    265     # xml header   
    266     if (defined $collection)
    267     {
    268     my $xml_head
    269         = '<? xml version="1.0" standalone="no" encoding="UTF-8" ?>';
    270     $xml_head .= "\n<!DOCTYPE gsdl:$colname SYSTEM \"$colname.dtd\">\n";
    271     $all_text = $xml_head.$all_text;
    272     }
    273 
    274     print $handle $all_text;
     192    my ($handle, $section) = @_;
     193
     194    print $handle $self->buffer_section_xml($section);
    275195}
    276196
  • trunk/gsdl/perllib/docsave.pm

    r2267 r2810  
    2525
    2626# This document processor saves a document in the
    27 # archives directory of a collection
     27# archives directory of a collection (as xml)
    2828
    2929
     
    5353    $self->{'groupsize'} = $groupsize;
    5454    $self->{'gs_count'} = 0;
    55 
    56     # keep an associate array of all metavalues used by collection to
    57     # help generate the XML DTD
    58     $self->{'dtd_metadata'} = {};
    5955
    6056    $self->{'outhandle'} = STDERR;
     
    9389   
    9490    } else {
    95     # groupsize is 1 (i.e. one document per GML file) so sortmeta
     91    # groupsize is 1 (i.e. one document per XML file) so sortmeta
    9692    # may be used
    9793   
     
    107103   
    108104    my $doc_file
    109         = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml");
    110     my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml");
     105        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");
     106    my $short_doc_file = &util::filename_cat ($doc_dir, "doc.xml");
    111107   
    112108    if (!open (OUTDOC, ">$doc_file")) {
     
    116112
    117113    # save this document
    118     $doc_obj->output_section('docsave::OUTDOC',
    119                  $doc_obj->get_top_section(),
    120                  $self->{'collection'},
    121                  $self->{'dtd_metadata'},0);
    122 
     114    $self->output_xml_header('docsave::OUTDOC');
     115    $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section());
     116    $self->output_xml_footer('docsave::OUTDOC');
    123117    close OUTDOC;
    124118
     
    172166        # only if opening new file
    173167        my $doc_file
    174         = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml");
    175         my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml");
     168        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");
     169        my $short_doc_file = &util::filename_cat ($doc_dir, "doc.xml");
    176170       
    177171        if ($gs_count>0)
     
    187181        $self->{'gs_short_filename'} = $short_doc_file;
    188182        $self->{'gs_OID'} = $OID;
     183
     184        $self->output_xml_header('docsave::OUTDOC');
    189185    }
    190186    }
    191187
    192188    # save this document
    193     $doc_obj->output_section('docsave::OUTDOC',
    194                  $doc_obj->get_top_section(),
    195                  $self->{'collection'},
    196                  $self->{'dtd_metadata'},0);
     189    $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section());
    197190
    198191    $self->{'gs_count'}++;
     
    210203    # same one.
    211204    $doc_dir = $doc_info->[0];
    212     $doc_dir =~ s/\/?doc\.gml(\.gz)?$//;
     205    $doc_dir =~ s/\/?doc\.xml(\.gz)?$//;
    213206    } else {
    214207    # have to get a new document directory
     
    264257{
    265258    my ($self) = @_;
    266    
     259
     260    $self->output_xml_footer('docsave::OUTDOC');   
    267261    close OUTDOC;
    268262
     
    288282}
    289283
     284sub output_xml_header {
     285    my $self = shift (@_);
     286    my ($handle) = @_;
     287
     288    print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
     289    print $handle '<!DOCTYPE GreenstoneArchive SYSTEM ' .
     290    '"http://greenstone.org/dtd/GreenstoneArchive/1.0/GreenstoneArchive.dtd">' . "\n";
     291    print $handle '<GreenstoneArchive>' . "\n";
     292}
     293
     294sub output_xml_footer {
     295    my $self = shift (@_);
     296    my ($handle) = @_;
     297
     298    print $handle '</GreenstoneArchive>' . "\n";
     299}
     300
    2903011;
Note: See TracChangeset for help on using the changeset viewer.