source: main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm@ 32536

Last change on this file since 32536 was 32536, checked in by ak19, 6 years ago

First commit to do with reading back in from the SQL DB. This commit introduces the new GreenstoneSQLPlugin for this purpose, which should ideally only be used during buildcol (but its init(), deinit() and read() methods are also called on import.pl). The new plugin works with GreenstoneSQLPlugout which wrote meta and txt to the SQL DB. Lots of TODOs and questions still here, some debug statements too. Also have to run some decisions by Dr Bainbridge. There are many hardcoded values which still have to be parameterised (not always completely sure how) and still have to test the 2 cases of sending just meta and just fulltxt to db. Next commit will tidy some things up.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.0 KB
RevLine 
[12330]1###########################################################################
2#
[17743]3# GreenstoneXMLPlugout.pm -- the plugout module for Greenstone Archives
[12330]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[17743]26package GreenstoneXMLPlugout;
[12330]27
28use strict;
29no strict 'refs';
[13172]30no strict 'subs';
[12330]31
32eval {require bytes};
33use util;
[27306]34use FileUtils;
[17203]35use BasePlugout;
[13172]36use docprint;
[12330]37
38sub BEGIN {
[17743]39 @GreenstoneXMLPlugout::ISA = ('BasePlugout');
[12330]40}
41
[28642]42my $arguments = [
43 { 'name' => "group_size",
44 'desc' => "{BasePlugout.group_size}",
45 'type' => "int",
46 'deft' => "1",
47 'reqd' => "no",
48 'hiddengli' => "no"}
49 ];
[17743]50my $options = { 'name' => "GreenstoneXMLPlugout",
51 'desc' => "{GreenstoneXMLPlugout.desc}",
[12330]52 'abstract' => "no",
[28642]53 'inherits' => "yes",
54 'args' => $arguments };
[12330]55
56sub new {
57 my ($class) = shift (@_);
58 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
59 push(@$plugoutlist, $class);
60
[17203]61 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
62 push(@{$hashArgOptLists->{"OptList"}},$options);
[12330]63
[17203]64 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
[32511]65
66 if ($self->{'info_only'}) {
67 # don't worry about any options etc
68 return bless $self, $class;
69 }
[17203]70 return bless $self, $class;
[12330]71}
72
[28642]73sub is_group {
74 my $self = shift (@_);
75 return ($self->{'group_size'} > 1);
76}
77
[32512]78sub old_unused_saveas {
[12330]79 my $self = shift (@_);
[28642]80 my ($doc_obj, $doc_dir) = @_;
[13172]81 my $outhandler;
[27522]82 my $output_file;
[13172]83 if ($self->{'debug'}) {
84 $outhandler = STDOUT;
[27517]85 }
[13172]86 else {
[28642]87
88 $self->process_assoc_files($doc_obj, $doc_dir, '');
[19494]89 $self->process_metafiles_metadata ($doc_obj);
[28642]90
91 # open up the outhandler
92 if ($self->is_group() && !$self->{'new_doc_dir'}) {
93 # we already have a handle open ??
94 $outhandler = $self->{'group_outhandler'};
95 } else {
[32536]96 $output_file = &FileUtils::filenameConcatenate(
97 $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj));
[28642]98 # open the new handle
99 $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
[19494]100
[28642]101 if (defined $self->{'xslt_writer'}){
102 $outhandler = $self->{'xslt_writer'};
103 }
104 else{
105 $outhandler = $self->get_output_handler($output_file);
106 }
107
108 if ($self->is_group()) {
109 $self->{'group_outhandler'} = $outhandler;
110 }
111 }
112 } # else not debug
113 binmode($outhandler,":utf8");
[27517]114
[28642]115 # only output the header if we have started a new doc
116 if (!$self->is_group() || $self->{'new_doc_dir'}) {
117 $self->output_xml_header($outhandler);
118 }
[27517]119
[32512]120 my $section_text = &docprint::get_section_xml($doc_obj);
[28642]121 print $outhandler $section_text;
122
123 # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output
124 if (!$self->is_group()) {
125 $self->output_xml_footer($outhandler);
[12330]126 }
[27517]127
[28642]128 # close off the output - in a group process situation, this will be done by close_group_output
129 if (!$self->is_group() && !$self->{'debug'}) {
[13172]130 if (defined $self->{'xslt_writer'}){
131 $self->close_xslt_pipe();
132 }
133 else {
[27522]134 &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;
[13172]135 }
[12330]136 }
[32536]137 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate(
138 $doc_dir, $self->get_doc_xml_filename($doc_obj));
[28642]139
140 $self->store_output_info_reference($doc_obj);
141
[12330]142}
143
[32533]144# can be overridden in subclasses, for instance by GreenstoneSQLPlugout, to produce a different filename
145# like docsql.xml
[32536]146sub get_doc_xml_filename {
[32533]147 my $self = shift (@_);
[32536]148 my ($doc_obj) = @_;
149 return "doc.xml";
[32533]150}
151
[32512]152sub pre_saveas {
153 my $self = shift (@_);
154 my ($doc_obj, $doc_dir) = @_;
155 my $outhandler;
156 my $output_file;
[32513]157
158 $self->process_assoc_files($doc_obj, $doc_dir, '');
159 $self->process_metafiles_metadata ($doc_obj);
160
[32512]161 if ($self->{'debug'}) {
162 $outhandler = STDOUT;
163 }
164 else {
165
166 # open up the outhandler
167 if ($self->is_group() && !$self->{'new_doc_dir'}) {
168 # we already have a handle open ??
169 $outhandler = $self->{'group_outhandler'};
170 } else {
[32536]171 $output_file = $output_file = &FileUtils::filenameConcatenate(
172 $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj));
[32512]173 # open the new handle
174 $self->open_xslt_pipe($output_file, $self->{'xslt_file'});
175
176 if (defined $self->{'xslt_writer'}){
177 $outhandler = $self->{'xslt_writer'};
178 }
179 else{
180 $outhandler = $self->get_output_handler($output_file);
181 }
182
183 if ($self->is_group()) {
184 $self->{'group_outhandler'} = $outhandler;
185 }
186 }
187 } # else not debug
188 binmode($outhandler,":utf8");
189
190 # only output the header if we have started a new doc
191 if (!$self->is_group() || $self->{'new_doc_dir'}) {
192 $self->output_xml_header($outhandler);
193 }
194
195 return ($outhandler, $output_file);
196}
197
198sub saveas {
199 my $self = shift (@_);
200 my ($doc_obj, $doc_dir) = @_;
201
202 # pre
203 my ($outhandler, $output_file) = $self->pre_saveas(@_);
204 push(@_, $outhandler, $output_file);
205
206 # write out the doc xml file for the current document
207 my $section_text = &docprint::get_section_xml($doc_obj);
208 print $outhandler $section_text;
209
210 # post
211 $self->post_saveas(@_);
212}
213
214sub post_saveas {
215 my $self = shift (@_);
216 my ($doc_obj, $doc_dir, $outhandler, $output_file) = @_;
217
218 # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output
219 if (!$self->is_group()) {
220 $self->output_xml_footer($outhandler);
221 }
222
223 # close off the output - in a group process situation, this will be done by close_group_output
224 if (!$self->is_group() && !$self->{'debug'}) {
225 if (defined $self->{'xslt_writer'}){
226 $self->close_xslt_pipe();
227 }
228 else {
229 &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file;
230 }
231 }
[32536]232 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate(
233 $doc_dir, $self->get_doc_xml_filename($doc_obj));
[32512]234
235 $self->store_output_info_reference($doc_obj);
236}
237
[28642]238sub output_xml_header {
239 my $self = shift (@_);
240 my ($outhandle) = @_;
[13172]241
[28642]242 print $outhandle '<?xml version="1.0" encoding="utf-8" standalone="no"?>' . "\n";
243 print $outhandle "<!DOCTYPE Archive SYSTEM \"http://greenstone.org/dtd/Archive/1.0/Archive.dtd\">\n";
244 print $outhandle "<Archive>\n";
245}
[13172]246
[28642]247sub output_xml_footer {
248 my $self = shift (@_);
249 my ($outhandle) = @_;
250
251 print $outhandle "</Archive>\n";
252}
253
254sub close_group_output
255{
256 my $self = shift(@_);
257
258 # make sure that the handle has been opened - it won't be if we failed
259 # to import any documents...
260 my $outhandle = $self->{'group_outhandler'};
261 if (defined(fileno($outhandle))) {
262 $self->output_xml_footer($outhandle);
263 &FileUtils::closeFileHandle("", \$outhandle);
264 undef $self->{'group_outhandler'}
265 }
266
267 my $OID = $self->{'gs_OID'};
268 my $short_doc_file = $self->{'short_doc_file'};
269
270 ### TODO - from here is old code. check that it is still valid.
271 if ($self->{'gzip'}) {
272 my $doc_file = $self->{'gs_filename'};
273 `gzip $doc_file`;
274 $doc_file .= ".gz";
275 $short_doc_file .= ".gz";
276 if (!&FileUtils::fileExists($doc_file)) {
277 my $outhandle = $self->{'output_handle'};
278 print $outhandle "error while gzipping: $doc_file doesn't exist\n";
279 return 0;
280 }
281 }
282
283 return 1;
284}
285
286
[12330]2871;
288
Note: See TracBrowser for help on using the repository browser.