Changeset 32512

Show
Ignore:
Timestamp:
11.10.2018 19:31:44 (6 weeks ago)
Author:
ak19
Message:

Refactoring some existing perl code as per Dr Bainbridge's ideas on this, so that MySQLPlugout, which inherits from GreenstoneXMLPlugout, can be structured better for reading.

Location:
main/trunk/greenstone2/perllib
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/doc.pm

    r28575 r32512  
    540540            print STDERR "doc::set_OID could not write to $filename\n"; 
    541541        } else { 
    542             my $doc_text = &docprint::get_section_xml($self, $self->get_top_section()); 
     542            my $doc_text = &docprint::get_section_xml($self); 
    543543            print OUTFILE $doc_text; 
    544544            close (OUTFILE); 
     
    561561    if (!defined $OID)  
    562562    { 
    563     my $hash_text = &docprint::get_section_xml($self, $self->get_top_section()); 
     563    my $hash_text = &docprint::get_section_xml($self); 
    564564    my $hash_len = length($hash_text); 
    565565 
  • main/trunk/greenstone2/perllib/docprint.pm

    r19214 r32512  
    3030package docprint; 
    3131 
     32use constant OUTPUT_META_ONLY => 1; 
     33use constant OUTPUT_TEXT_ONLY => 2; 
     34use constant OUTPUT_ALL => 3; 
     35 
    3236use strict; 
    3337 
    3438sub get_section_xml { 
     39    return &get_section_xml_from_root(@_); 
     40} 
     41 
     42sub get_section_xml_from_root { 
     43    my ($doc_obj, $options) = @_; 
     44    return &recursive_get_section_xml($doc_obj,$doc_obj->get_top_section(), $options); 
     45} 
     46 
     47sub recursive_get_section_xml {     
     48    my ($doc_obj, $section, $options) = @_; 
    3549     
    36     my ($doc_obj, $section) = @_; 
    37  
     50    # 'output' can be OUTPUT_ALL|OUTPUT_META_ONLY|OUTPUT_TEXT_ONLY 
     51    # If not provided, it defaults to OUTPUT_ALL. 
     52    # If OUTPUT_ALL, the metadata and full text both go into doc.xml 
     53    # If OUTPUT_META_ONLY, the metadata goes into doc.xml and full text goes elsewhere (mysql db). 
     54    # If OUTPUT_TEXT_ONLY, the full text goes into doc.xml and metadata goes elsewhere (mysql db). 
     55    # In the last two cases, an XML comment is left behind to indicate that the "missing" doc 
     56    # information is stored elsewhere. 
     57    if(!defined $options) {  
     58    $options = {'output' => OUTPUT_ALL }; 
     59    } 
     60     
    3861    my $section_ptr = $doc_obj->_lookup_section ($section); 
    3962    return "" unless defined $section_ptr; 
     
    4164    my $all_text = "<Section>\n"; 
    4265    $all_text .= "  <Description>\n"; 
    43      
    44     # output metadata 
    45     foreach my $data (@{$section_ptr->{'metadata'}}) { 
    46     my $escaped_value = &escape_text($data->[1]); 
    47     $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n"; 
     66 
     67    if($options->{'output'} == OUTPUT_ALL || $options->{'output'} == OUTPUT_META_ONLY) { 
     68    # output metadata 
     69    foreach my $data (@{$section_ptr->{'metadata'}}) { 
     70        my $escaped_value = &escape_text($data->[1]); 
     71        $all_text .= '    <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n"; 
     72    } 
     73    } else { 
     74    $all_text .= "<!-- metadata is stored elsewhere (MySQL database) -->\n"; 
    4875    } 
    4976 
     
    5279    # output the text 
    5380    $all_text .= "  <Content>"; 
    54     $all_text .= &escape_text($section_ptr->{'text'}); 
     81    if($options->{'output'} == OUTPUT_ALL || $options->{'output'} == OUTPUT_TEXT_ONLY) { 
     82    $all_text .= &escape_text($section_ptr->{'text'}); 
     83    } else { 
     84    $all_text .= "<!-- full text is stored elsewhere (MySQL database) -->\n"; 
     85    } 
    5586    $all_text .= "</Content>\n"; 
    5687     
    5788    # output all the subsections 
    5889    foreach my $subsection (@{$section_ptr->{'subsection_order'}}) { 
    59     $all_text .= &get_section_xml($doc_obj, "$section.$subsection"); 
     90    $all_text .= &recursive_get_section_xml($doc_obj, "$section.$subsection"); 
    6091    } 
    6192     
  • main/trunk/greenstone2/perllib/oaiinfo.pm

    r32097 r32512  
    162162    # yet had done some import (and perhaps building) followed by a full re-import now. 
    163163    # Since there was no activate and we're doing a removeold/full-rebuild now, can just 
    164     # work with a new tmpdb, even though one already existed, its contents can be wiped out. 
    165         # In such a scenario, we'll be deleting tmpdb. Then there  will be no livedb nor any tmpdb 
     164    # work with a new tmpdb: even though one already existed, its contents can be wiped out. 
     165        # In such a scenario, we'll be deleting tmpdb. Then there will be no livedb nor any tmpdb 
    166166    # any more, so same situation as if importing the very first time when no oaidb exists either. 
    167167 
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm

    r32511 r32512  
    7676} 
    7777 
    78 sub saveas { 
     78sub old_unused_saveas { 
    7979    my $self = shift (@_); 
    8080    my ($doc_obj, $doc_dir) = @_; 
     
    117117    } 
    118118 
    119     my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section()); 
     119    my $section_text = &docprint::get_section_xml($doc_obj); 
    120120    print $outhandler $section_text; 
    121121  
     
    138138    $self->store_output_info_reference($doc_obj); 
    139139     
     140} 
     141 
     142sub pre_saveas { 
     143    my $self = shift (@_); 
     144    my ($doc_obj, $doc_dir) = @_; 
     145    my $outhandler; 
     146    my $output_file; 
     147    if ($self->{'debug'}) { 
     148    $outhandler = STDOUT; 
     149    } 
     150    else { 
     151        
     152    $self->process_assoc_files($doc_obj, $doc_dir, ''); 
     153    $self->process_metafiles_metadata ($doc_obj); 
     154     
     155    # open up the outhandler     
     156    if ($self->is_group() && !$self->{'new_doc_dir'}) {  
     157        # we already have a handle open ?? 
     158        $outhandler = $self->{'group_outhandler'}; 
     159    } else { 
     160        $output_file = &FileUtils::filenameConcatenate($self->{'output_dir'}, $doc_dir, "doc.xml"); 
     161        # open the new handle 
     162        $self->open_xslt_pipe($output_file, $self->{'xslt_file'}); 
     163 
     164        if (defined $self->{'xslt_writer'}){ 
     165        $outhandler = $self->{'xslt_writer'}; 
     166        } 
     167        else{ 
     168        $outhandler = $self->get_output_handler($output_file); 
     169        } 
     170         
     171        if ($self->is_group()) { 
     172        $self->{'group_outhandler'} = $outhandler; 
     173        } 
     174    } 
     175    } # else not debug 
     176    binmode($outhandler,":utf8"); 
     177 
     178    # only output the header if we have started a new doc 
     179    if (!$self->is_group() || $self->{'new_doc_dir'}) { 
     180    $self->output_xml_header($outhandler); 
     181    } 
     182  
     183    return ($outhandler, $output_file); 
     184} 
     185 
     186sub saveas { 
     187    my $self = shift (@_); 
     188    my ($doc_obj, $doc_dir) = @_; 
     189 
     190    # pre 
     191    my ($outhandler, $output_file) = $self->pre_saveas(@_); 
     192    push(@_, $outhandler, $output_file); 
     193 
     194    # write out the doc xml file for the current document 
     195    my $section_text = &docprint::get_section_xml($doc_obj); 
     196    print $outhandler $section_text; 
     197 
     198    # post 
     199    $self->post_saveas(@_); 
     200} 
     201 
     202sub post_saveas { 
     203    my $self = shift (@_);     
     204    my ($doc_obj, $doc_dir, $outhandler, $output_file) = @_; 
     205     
     206    # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output 
     207    if (!$self->is_group()) { 
     208    $self->output_xml_footer($outhandler); 
     209    } 
     210 
     211    # close off the output - in a group process situation, this will be done by close_group_output 
     212    if (!$self->is_group() && !$self->{'debug'}) { 
     213    if (defined $self->{'xslt_writer'}){      
     214        $self->close_xslt_pipe();  
     215    } 
     216    else { 
     217        &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file; 
     218    } 
     219    } 
     220    $self->{'short_doc_file'} = &FileUtils::filenameConcatenate($doc_dir, "doc.xml");   
     221     
     222    $self->store_output_info_reference($doc_obj);     
    140223} 
    141224