Changeset 32512


Ignore:
Timestamp:
2018-10-11T19:31:44+13:00 (6 years ago)
Author:
ak19
Message:

Refactoring some existing perl code as per Dr Bainbridge's ideas on this, so that MySQLPlugout, which inherits from GreenstoneXMLPlugout, can be structured better for reading.

Location:
main/trunk/greenstone2/perllib
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/doc.pm

    r28575 r32512  
    540540            print STDERR "doc::set_OID could not write to $filename\n";
    541541        } else {
    542             my $doc_text = &docprint::get_section_xml($self, $self->get_top_section());
     542            my $doc_text = &docprint::get_section_xml($self);
    543543            print OUTFILE $doc_text;
    544544            close (OUTFILE);
     
    561561    if (!defined $OID)
    562562    {
    563     my $hash_text = &docprint::get_section_xml($self, $self->get_top_section());
     563    my $hash_text = &docprint::get_section_xml($self);
    564564    my $hash_len = length($hash_text);
    565565
  • main/trunk/greenstone2/perllib/docprint.pm

    r19214 r32512  
    3030package docprint;
    3131
     32use constant OUTPUT_META_ONLY => 1;
     33use constant OUTPUT_TEXT_ONLY => 2;
     34use constant OUTPUT_ALL => 3;
     35
    3236use strict;
    3337
    3438sub get_section_xml {
     39    return &get_section_xml_from_root(@_);
     40}
     41
     42sub get_section_xml_from_root {
     43    my ($doc_obj, $options) = @_;
     44    return &recursive_get_section_xml($doc_obj,$doc_obj->get_top_section(), $options);
     45}
     46
     47sub recursive_get_section_xml {   
     48    my ($doc_obj, $section, $options) = @_;
    3549   
    36     my ($doc_obj, $section) = @_;
    37 
     50    # 'output' can be OUTPUT_ALL|OUTPUT_META_ONLY|OUTPUT_TEXT_ONLY
     51    # If not provided, it defaults to OUTPUT_ALL.
     52    # If OUTPUT_ALL, the metadata and full text both go into doc.xml
     53    # If OUTPUT_META_ONLY, the metadata goes into doc.xml and full text goes elsewhere (mysql db).
     54    # If OUTPUT_TEXT_ONLY, the full text goes into doc.xml and metadata goes elsewhere (mysql db).
     55    # In the last two cases, an XML comment is left behind to indicate that the "missing" doc
     56    # information is stored elsewhere.
     57    if(!defined $options) {
     58    $options = {'output' => OUTPUT_ALL };
     59    }
     60   
    3861    my $section_ptr = $doc_obj->_lookup_section ($section);
    3962    return "" unless defined $section_ptr;
     
    4164    my $all_text = "<Section>\n";
    4265    $all_text .= "  <Description>\n";
    43    
    44     # output metadata
    45     foreach my $data (@{$section_ptr->{'metadata'}}) {
    46     my $escaped_value = &escape_text($data->[1]);
    47     $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
     66
     67    if($options->{'output'} == OUTPUT_ALL || $options->{'output'} == OUTPUT_META_ONLY) {
     68    # output metadata
     69    foreach my $data (@{$section_ptr->{'metadata'}}) {
     70        my $escaped_value = &escape_text($data->[1]);
     71        $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
     72    }
     73    } else {
     74    $all_text .= "<!-- metadata is stored elsewhere (MySQL database) -->\n";
    4875    }
    4976
     
    5279    # output the text
    5380    $all_text .= "  <Content>";
    54     $all_text .= &escape_text($section_ptr->{'text'});
     81    if($options->{'output'} == OUTPUT_ALL || $options->{'output'} == OUTPUT_TEXT_ONLY) {
     82    $all_text .= &escape_text($section_ptr->{'text'});
     83    } else {
     84    $all_text .= "<!-- full text is stored elsewhere (MySQL database) -->\n";
     85    }
    5586    $all_text .= "</Content>\n";
    5687   
    5788    # output all the subsections
    5889    foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
    59     $all_text .= &get_section_xml($doc_obj, "$section.$subsection");
     90    $all_text .= &recursive_get_section_xml($doc_obj, "$section.$subsection");
    6091    }
    6192   
  • main/trunk/greenstone2/perllib/oaiinfo.pm

    r32097 r32512  
    162162    # yet had done some import (and perhaps building) followed by a full re-import now.
    163163    # Since there was no activate and we're doing a removeold/full-rebuild now, can just
    164     # work with a new tmpdb, even though one already existed, its contents can be wiped out.
    165         # In such a scenario, we'll be deleting tmpdb. Then there  will be no livedb nor any tmpdb
     164    # work with a new tmpdb: even though one already existed, its contents can be wiped out.
     165        # In such a scenario, we'll be deleting tmpdb. Then there will be no livedb nor any tmpdb
    166166    # any more, so same situation as if importing the very first time when no oaidb exists either.
    167167
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm

    r32511 r32512  
    7676}
    7777
    78 sub saveas {
     78sub old_unused_saveas {
    7979    my $self = shift (@_);
    8080    my ($doc_obj, $doc_dir) = @_;
     
    117117    }
    118118
    119     my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section());
     119    my $section_text = &docprint::get_section_xml($doc_obj);
    120120    print $outhandler $section_text;
    121121 
     
    138138    $self->store_output_info_reference($doc_obj);
    139139   
     140}
     141
     142sub pre_saveas {
     143    my $self = shift (@_);
     144    my ($doc_obj, $doc_dir) = @_;
     145    my $outhandler;
     146    my $output_file;
     147    if ($self->{'debug'}) {
     148    $outhandler = STDOUT;
     149    }
     150    else {
     151       
     152    $self->process_assoc_files($doc_obj, $doc_dir, '');
     153    $self->process_metafiles_metadata ($doc_obj);
     154   
     155    # open up the outhandler   
     156    if ($self->is_group() && !$self->{'new_doc_dir'}) {
     157        # we already have a handle open ??
     158        $outhandler = $self->{'group_outhandler'};
     159    } else {
     160        $output_file = &FileUtils::filenameConcatenate($self->{'output_dir'}, $doc_dir, "doc.xml");
     161        # open the new handle
     162        $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
     163
     164        if (defined $self->{'xslt_writer'}){
     165        $outhandler = $self->{'xslt_writer'};
     166        }
     167        else{
     168        $outhandler = $self->get_output_handler($output_file);
     169        }
     170       
     171        if ($self->is_group()) {
     172        $self->{'group_outhandler'} = $outhandler;
     173        }
     174    }
     175    } # else not debug
     176    binmode($outhandler,":utf8");
     177
     178    # only output the header if we have started a new doc
     179    if (!$self->is_group() || $self->{'new_doc_dir'}) {
     180    $self->output_xml_header($outhandler);
     181    }
     182 
     183    return ($outhandler, $output_file);
     184}
     185
     186sub saveas {
     187    my $self = shift (@_);
     188    my ($doc_obj, $doc_dir) = @_;
     189
     190    # pre
     191    my ($outhandler, $output_file) = $self->pre_saveas(@_);
     192    push(@_, $outhandler, $output_file);
     193
     194    # write out the doc xml file for the current document
     195    my $section_text = &docprint::get_section_xml($doc_obj);
     196    print $outhandler $section_text;
     197
     198    # post
     199    $self->post_saveas(@_);
     200}
     201
     202sub post_saveas {
     203    my $self = shift (@_);   
     204    my ($doc_obj, $doc_dir, $outhandler, $output_file) = @_;
     205   
     206    # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output
     207    if (!$self->is_group()) {
     208    $self->output_xml_footer($outhandler);
     209    }
     210
     211    # close off the output - in a group process situation, this will be done by close_group_output
     212    if (!$self->is_group() && !$self->{'debug'}) {
     213    if (defined $self->{'xslt_writer'}){     
     214        $self->close_xslt_pipe();
     215    }
     216    else {
     217        &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;
     218    }
     219    }
     220    $self->{'short_doc_file'} = &FileUtils::filenameConcatenate($doc_dir, "doc.xml"); 
     221   
     222    $self->store_output_info_reference($doc_obj);   
    140223}
    141224
Note: See TracChangeset for help on using the changeset viewer.