root/main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm @ 32536

Revision 32536, 8.0 KB (checked in by ak19, 9 months ago)

First commit to do with reading back in from the SQL DB. This commit introduces the new GreenstoneSQLPlugin for this purpose, which should ideally only be used during buildcol (but its init(), deinit() and read() methods are also called on import.pl). The new plugin works with GreenstoneSQLPlugout which wrote meta and txt to the SQL DB. Lots of TODOs and questions still here, some debug statements too. Also have to run some decisions by Dr Bainbridge. There are many hardcoded values which still have to be parameterised (not always completely sure how) and still have to test the 2 cases of sending just meta and just fulltxt to db. Next commit will tidy some things up.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# GreenstoneXMLPlugout.pm -- the plugout module for Greenstone Archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package GreenstoneXMLPlugout;
27
28use strict;
29no strict 'refs';
30no strict 'subs';
31
32eval {require bytes};
33use util;
34use FileUtils;
35use BasePlugout;
36use docprint;
37
38sub BEGIN {
39    @GreenstoneXMLPlugout::ISA = ('BasePlugout');
40}
41
42my $arguments = [
43       { 'name' => "group_size",
44    'desc' => "{BasePlugout.group_size}",
45    'type' => "int",
46        'deft' =>  "1",
47    'reqd' => "no",
48    'hiddengli' => "no"}
49    ];
50my $options = { 'name'     => "GreenstoneXMLPlugout",
51        'desc'     => "{GreenstoneXMLPlugout.desc}",
52        'abstract' => "no",
53        'inherits' => "yes",
54        'args'     => $arguments };
55
56sub new {
57    my ($class) = shift (@_);
58    my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
59    push(@$plugoutlist, $class);
60
61    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
62    push(@{$hashArgOptLists->{"OptList"}},$options);
63
64    my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
65   
66    if ($self->{'info_only'}) {
67        # don't worry about any options etc
68        return bless $self, $class;
69    }
70    return bless $self, $class;
71}
72
73sub is_group {
74    my $self = shift (@_);
75    return ($self->{'group_size'} > 1);
76}
77
78sub old_unused_saveas {
79    my $self = shift (@_);
80    my ($doc_obj, $doc_dir) = @_;
81    my $outhandler;
82    my $output_file;
83    if ($self->{'debug'}) {
84    $outhandler = STDOUT;
85    }
86    else {
87       
88    $self->process_assoc_files($doc_obj, $doc_dir, '');
89    $self->process_metafiles_metadata ($doc_obj);
90   
91    # open up the outhandler   
92    if ($self->is_group() && !$self->{'new_doc_dir'}) {
93        # we already have a handle open ??
94        $outhandler = $self->{'group_outhandler'};
95    } else {
96        $output_file = &FileUtils::filenameConcatenate(
97        $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj));
98        # open the new handle
99        $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
100
101        if (defined $self->{'xslt_writer'}){
102        $outhandler = $self->{'xslt_writer'};
103        }
104        else{
105        $outhandler = $self->get_output_handler($output_file);
106        }
107       
108        if ($self->is_group()) {
109        $self->{'group_outhandler'} = $outhandler;
110        }
111    }
112    } # else not debug
113    binmode($outhandler,":utf8");
114
115    # only output the header if we have started a new doc
116    if (!$self->is_group() || $self->{'new_doc_dir'}) {
117    $self->output_xml_header($outhandler);
118    }
119
120    my $section_text = &docprint::get_section_xml($doc_obj);
121    print $outhandler $section_text;
122 
123    # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output
124    if (!$self->is_group()) {
125    $self->output_xml_footer($outhandler);
126    }
127
128    # close off the output - in a group process situation, this will be done by close_group_output
129    if (!$self->is_group() && !$self->{'debug'}) {
130    if (defined $self->{'xslt_writer'}){     
131        $self->close_xslt_pipe();
132    }
133    else {
134        &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;
135    }
136    }
137    $self->{'short_doc_file'} = &FileUtils::filenameConcatenate(
138    $doc_dir, $self->get_doc_xml_filename($doc_obj)); 
139   
140    $self->store_output_info_reference($doc_obj);
141   
142}
143
144# can be overridden in subclasses, for instance by GreenstoneSQLPlugout, to produce a different filename
145# like docsql.xml
146sub get_doc_xml_filename {
147    my $self = shift (@_);
148    my ($doc_obj) = @_;
149    return "doc.xml";
150}
151
152sub pre_saveas {
153    my $self = shift (@_);
154    my ($doc_obj, $doc_dir) = @_;
155    my $outhandler;
156    my $output_file;
157
158    $self->process_assoc_files($doc_obj, $doc_dir, '');
159    $self->process_metafiles_metadata ($doc_obj);
160   
161    if ($self->{'debug'}) {
162    $outhandler = STDOUT;
163    }
164    else {
165   
166    # open up the outhandler   
167    if ($self->is_group() && !$self->{'new_doc_dir'}) {
168        # we already have a handle open ??
169        $outhandler = $self->{'group_outhandler'};
170    } else {
171        $output_file = $output_file = &FileUtils::filenameConcatenate(
172        $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj));
173        # open the new handle
174        $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
175
176        if (defined $self->{'xslt_writer'}){
177        $outhandler = $self->{'xslt_writer'};
178        }
179        else{
180        $outhandler = $self->get_output_handler($output_file);
181        }
182       
183        if ($self->is_group()) {
184        $self->{'group_outhandler'} = $outhandler;
185        }
186    }
187    } # else not debug
188    binmode($outhandler,":utf8");
189
190    # only output the header if we have started a new doc
191    if (!$self->is_group() || $self->{'new_doc_dir'}) {
192    $self->output_xml_header($outhandler);
193    }
194 
195    return ($outhandler, $output_file);
196}
197
198sub saveas {
199    my $self = shift (@_);
200    my ($doc_obj, $doc_dir) = @_;
201
202    # pre
203    my ($outhandler, $output_file) = $self->pre_saveas(@_);
204    push(@_, $outhandler, $output_file);
205
206    # write out the doc xml file for the current document
207    my $section_text = &docprint::get_section_xml($doc_obj);
208    print $outhandler $section_text;
209
210    # post
211    $self->post_saveas(@_);
212}
213
214sub post_saveas {
215    my $self = shift (@_);   
216    my ($doc_obj, $doc_dir, $outhandler, $output_file) = @_;
217   
218    # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output
219    if (!$self->is_group()) {
220    $self->output_xml_footer($outhandler);
221    }
222
223    # close off the output - in a group process situation, this will be done by close_group_output
224    if (!$self->is_group() && !$self->{'debug'}) {
225    if (defined $self->{'xslt_writer'}){     
226        $self->close_xslt_pipe();
227    }
228    else {
229        &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;
230    }
231    }
232    $self->{'short_doc_file'} = &FileUtils::filenameConcatenate(
233    $doc_dir, $self->get_doc_xml_filename($doc_obj));
234   
235    $self->store_output_info_reference($doc_obj);   
236}
237
238sub output_xml_header {
239    my $self = shift (@_);
240    my ($outhandle) = @_;
241
242    print $outhandle '<?xml version="1.0" encoding="utf-8" standalone="no"?>' . "\n";
243    print $outhandle "<!DOCTYPE Archive SYSTEM \"http://greenstone.org/dtd/Archive/1.0/Archive.dtd\">\n";
244    print $outhandle "<Archive>\n";
245}
246
247sub output_xml_footer {
248    my $self = shift (@_);
249    my ($outhandle) = @_;
250
251    print $outhandle "</Archive>\n";
252}
253
254sub close_group_output
255{
256    my $self = shift(@_);
257 
258    # make sure that the handle has been opened - it won't be if we failed
259    # to import any documents...
260    my $outhandle = $self->{'group_outhandler'};
261    if (defined(fileno($outhandle))) {
262    $self->output_xml_footer($outhandle);   
263    &FileUtils::closeFileHandle("", \$outhandle);
264    undef $self->{'group_outhandler'}
265    }
266
267    my $OID = $self->{'gs_OID'};
268    my $short_doc_file = $self->{'short_doc_file'};
269   
270    ### TODO - from here is old code. check that it is still valid.
271    if ($self->{'gzip'}) {
272    my $doc_file = $self->{'gs_filename'};
273    `gzip $doc_file`;
274    $doc_file .= ".gz";
275    $short_doc_file .= ".gz";
276    if (!&FileUtils::fileExists($doc_file)) {
277         my $outhandle = $self->{'output_handle'};
278        print $outhandle "error while gzipping: $doc_file doesn't exist\n";
279        return 0;
280    }
281    }
282
283    return 1;
284}
285
286
2871;
288
Note: See TracBrowser for help on using the browser.