Changeset 2267


Ignore:
Timestamp:
2001-04-03T14:33:03+12:00 (23 years ago)
Author:
davidb
Message:

GML file syntax altered to be XML compliant. This basically meant
turning attribute lists of metadata names (which in Greenstone can
appear multiple times within a tag) into tag names themselves, which
are then explicitly stated in a <metadata>...</metadata> block.

Newly built collection will use the new syntactic form, however the
GMLPlug file is backwards compatible and so will still import in
files in the older GML format.

Location:
trunk/gsdl/perllib
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r1870 r2267  
    143143
    144144
    145 sub buffer_section {
     145sub buffer_section_gml {
    146146    my $self = shift (@_);
    147147    my ($section, $suppress_subject_info) = @_;
     
    171171    # output all the subsections
    172172    foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
    173     $all_text .= $self->buffer_section("$section.$subsection", $suppress_subject_info);
     173    $all_text .= $self->buffer_section_gml("$section.$subsection",
     174                           $suppress_subject_info);
    174175    }
    175176   
     
    180181}
    181182
     183
     184sub buffer_section_xml {
     185    my $self = shift (@_);
     186    my ($section, $dtd_metadata, $suppress_subject_info) = @_;
     187
     188    $suppress_subject_info = 0 unless defined $suppress_subject_info;
     189    my ($all_text, $data, $subsection);
     190   
     191    my $section_ptr = $self->_lookup_section ($section);
     192    my ($section_num) = $section =~ /(\d+)$/;
     193   
     194    return "" unless defined $section_ptr;
     195
     196    # output the section header (including the section number
     197    # and metadata)
     198
     199    $all_text .= "<gsdlsection";
     200    $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
     201    $all_text .= ">\n";
     202   
     203    $all_text .= "  <metadata>\n";
     204   
     205    # output metadata
     206    foreach $data (@{$section_ptr->{'metadata'}}) {
     207    my $tag_name = $data->[0];
     208    my $tag_value = &_escape_text($data->[1]);
     209
     210    unless ($suppress_subject_info && $tag_name eq "Subject")
     211    {
     212        if (defined $dtd_metadata)
     213        {
     214        $dtd_metadata->{$tag_name}++;
     215        }
     216
     217        $all_text .= "    <$tag_name>$tag_value</$tag_name>\n";
     218    }
     219    }
     220
     221    $all_text .= "  </metadata>\n";
     222
     223    # output the text
     224    $all_text .= &_escape_text($section_ptr->{'text'});
     225
     226    # output all the subsections
     227    foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
     228    $all_text .= $self->buffer_section_xml("$section.$subsection",
     229                           $dtd_metadata,
     230                           $suppress_subject_info);
     231    }
     232   
     233    # output the closing tag
     234    $all_text .=  "</gsdlsection>\n";
     235
     236    return $all_text;
     237}
     238
    182239sub output_section {
    183240    my $self = shift (@_);
    184     my ($handle, $section, $suppress_subject_info) = @_;
    185 
    186     my $all_text = $self->buffer_section($section, $suppress_subject_info);
     241    my ($handle, $section, $colname, $dtd_metadata,
     242    $suppress_subject_info) = @_;
     243
     244    my $all_text = $self->buffer_section_xml($section, $dtd_metadata,
     245                         $suppress_subject_info);
     246
     247    # xml header   
     248    if (defined $collection)
     249    {
     250    my $xml_head
     251        = '<? xml version="1.0" standalone="no" encoding="UTF-8" ?>';
     252    $xml_head .= "\n<!DOCTYPE gsdl:$colname SYSTEM \"$colname.dtd\">\n";
     253    $all_text = $xml_head.$all_text;
     254    }
     255
    187256    print $handle $all_text;
    188257}
     
    262331        print STDERR "doc::set_OID could not write to $filename\n";
    263332        } else {
    264         $self->output_section('OUTFILE', $self->get_top_section(), 1);
     333        $self->output_section('OUTFILE', $self->get_top_section(),
     334                      undef, 1);
    265335        close (OUTFILE);
    266336        }
     
    283353    if (!defined $OID)
    284354    {
    285     my $hash_text = $self->buffer_section($self->get_top_section(), 1);
     355    my $hash_text = $self->buffer_section_gml($self->get_top_section(),
     356                          undef, 1);
    286357    my $hash_len = length($hash_text);
    287358
     
    493564    my $section = $mastersection;
    494565    my $sectionref = $self;
    495 
    496 ####    print STDERR "*** mastersection = $mastersection\n";
    497566
    498567    while ($section ne "") {
  • trunk/gsdl/perllib/docprint.pm

    r782 r2267  
    6060   
    6161    # output document
    62     $doc_obj->output_section(STDOUT, $doc_obj->get_top_section());
     62    $doc_obj->output_section(STDOUT, $doc_obj->get_top_section(),
     63                 undef, undef, 0);
    6364}
    6465
  • trunk/gsdl/perllib/docsave.pm

    r2224 r2267  
    5353    $self->{'groupsize'} = $groupsize;
    5454    $self->{'gs_count'} = 0;
    55    
     55
     56    # keep an associate array of all metavalues used by collection to
     57    # help generate the XML DTD
     58    $self->{'dtd_metadata'} = {};
     59
    5660    $self->{'outhandle'} = STDERR;
    5761    $self->{'outhandle'} = $outhandle if defined $outhandle;
    58    
    5962    # set a default for the archive directory
    6063    $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
     
    113116
    114117    # save this document
    115     $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section());
     118    $doc_obj->output_section('docsave::OUTDOC',
     119                 $doc_obj->get_top_section(),
     120                 $self->{'collection'},
     121                 $self->{'dtd_metadata'},0);
     122
    116123    close OUTDOC;
    117124
     
    184191
    185192    # save this document
    186     $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section());
     193    $doc_obj->output_section('docsave::OUTDOC',
     194                 $doc_obj->get_top_section(),
     195                 $self->{'collection'},
     196                 $self->{'dtd_metadata'},0);
    187197
    188198    $self->{'gs_count'}++;
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r1954 r2267  
    9696
    9797        my @indenting_sections = split("<gsdlsection", $gml);
    98         shift(@indenting_sections); # first entry is trivially empty
     98        shift(@indenting_sections); # skips over xml header if present
    9999
    100100        foreach $gml (@indenting_sections) {
     
    127127        }
    128128       
    129         # add the tags
     129        # add the metadata
     130        # could be stored as either attributes or ....
    130131        while ((defined $tags) && ($tags =~ s/^\s*(\S+)=\"([^\"]*)\"//o)) {
    131132            $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2))
     
    134135        }
    135136       
     137        # ... or tags (xml compliant)
     138        if ($text =~ m/^\s*<metadata>/)
     139        {
     140            my $metadata;
     141            ($metadata,$text)
     142            = ($text =~ m/\s*<metadata>\s*(<.*)\s*<\/metadata>(.*)$/s);
     143            while ((defined $metadata)
     144               && ($metadata =~ s/<(.*?)>(.*?)<\/.*?>//s))
     145            {
     146            $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2))
     147                if (defined $1 and defined $2);
     148            }
     149        }
     150
    136151        # add the text
     152
    137153        $doc_obj->add_utf8_text($section, $text)
    138154            if ((defined $text) && ($text ne ""));     
Note: See TracChangeset for help on using the changeset viewer.