source: gs2-extensions/open-office/trunk/src/perllib/plugins/OpenOfficeConverter.pm@ 32323

Last change on this file since 32323 was 32323, checked in by ak19, 6 years ago
  1. Dr Bainbridge fixed the way OpenOfficeConverter launched soffice in headless mode: it was incorrectly doing the 2 greater than ampersand 1 (redirecting stderr to stdout) before redirecting to /dev/null (and nul on windows). It has to be redirecting to /dev/null (and nul) before doing 2 greater than ampersand 1. The error had resulted in GLI failing to launch properly, with the call to pluginfo on the entire pluginlist blocking. SafeProcess was getting no data at all on the pluginfo process' stdout, not even eof/eos, so the BufferedReader.readLine() (and any read()) blocked resulting in that InputStreamGobbler.join() call not terminating.
  • Property svn:executable set to *
File size: 12.8 KB
Line 
1###########################################################################
2#
3# OpenOfficeConverter - helper plugin that does office document conversion
4# using jodconverter combined with OpenOffice
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2010 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package OpenOfficeConverter;
28
29use ConvertBinaryFile;
30use BaseMediaConverter;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35use gsprintf 'gsprintf';
36
37# these two variables mustn't be initialised here or they will get stuck
38# at those values.
39our $openoffice_conversion_available;
40our $no_openoffice_conversion_reason;
41
42BEGIN {
43 @OpenOfficeConverter::ISA = ('BaseMediaConverter');
44
45 # Check that OpenOffice and jodconverter are installed and available on
46 # the path
47 $openoffice_conversion_available = 1;
48 $no_openoffice_conversion_reason = "";
49
50 if (! defined $ENV{'GEXT_OPENOFFICE'}) {
51 $openoffice_conversion_available = 0;
52 $no_openoffice_conversion_reason = "gextopenofficenotinstalled";
53 }
54 else {
55 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
56 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
57 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
58
59 if (!-e $jodjar) {
60 #print STDERR "Failed to find $jodjar\n";
61 $openoffice_conversion_available = 0;
62 $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
63 }
64 else {
65 # test to see if soffice is in path
66
67 if(!defined $ENV{'SOFFICE_HOST'}) {
68 $ENV{'SOFFICE_HOST'} = "localhost";
69 }
70 if(!defined $ENV{'SOFFICE_PORT'}) {
71 $ENV{'SOFFICE_PORT'} = "8100";
72 }
73
74 #my $cmd = "soffice --headless 2>&1"; # for linux and mac
75 my $cmd = "soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless"; # basic shared command
76 my $status = 0;
77
78 # No more JODCONVERTER_PORT env var: the port that jodconverter uses is now the
79 # same as SOFFICE_PORT, because the 2 are meant to communicate on the same port.
80 # The default port that jodconverter expects OO (OpenOffice/libreoffice/soffice/staroffice) to listen on is 8100, so we try that.
81 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
82
83 # important to have this set when the Greenstone server
84 # and open office is on a remote windows machine
85 if(!defined $ENV{'SOFFICE_HOME'}) {
86 $ENV{'SOFFICE_HOME'} = &FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"OpenOffice.org 3");
87 }
88 my $ooffice_dir_guess =
89 &FileUtils::filenameConcatenate($ENV{'SOFFICE_HOME'},"program");
90 if (-d $ooffice_dir_guess) {
91 &util::envvar_append("PATH",$ooffice_dir_guess);
92 }
93
94 # for windows, when working on a remote system, want to be able to start OO if
95 # not already running. We'll use the uno socket method to do so. Else client-gli
96 # tends to hang, waiting for the prompt to return after OO has been started up
97 # (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
98
99 # first need to silently check soffice exists else windows will display a popup
100 $status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
101 if ($status == 0) {
102 $cmd = "start \"soffice process\" $cmd >nul 2>&1"; # order of >nul and 2>&1 matters, see below
103 #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";
104 }
105 # else cmd is still undefined
106 }
107 else {
108 # Windows seems to launch OpenOffice as a service (i.e.
109 # automatically puts it in the background).
110 # For Unix putting it in the background needs to be done
111 # explicitly by appending & to the end
112
113 # IMPORTANT! When redirecting output from 2>&1 to a file or null, >/dev/null (or >out.txt) should come BEFORE 2>&1
114 # i.e. $some_cmd >out.txt 2>&1 and likewise $some_cmd >/dev/null 2>&1
115 # Doing it in the wrong order further causes problems with SafeProcess when GLI starts up and calls pluginfo on the pluginslist:
116 # SafeProcess blocks forever on read() from stdout of the process running pluginfo, because there's never data including no eof/eos
117 # on the process' stdout when running this command in the wrong order.
118 $cmd .= " >/dev/null 2>&1 &";
119 }
120
121 #print STDERR "@@@@ running: $cmd\n";
122
123 $status = system($cmd) if ($status == 0);
124 if ($status != 0) {
125 #print STDERR "Failed to run: $cmd\n";
126 #print STDERR "$!\n";
127 $openoffice_conversion_available = 0;
128 $no_openoffice_conversion_reason = "openofficenotinstalled";
129 }
130 }
131 }
132}
133
134my $arguments = [
135 { 'name' => "openoffice_port",
136 'desc' => "{OpenOfficeConverter.openoffice_port}",
137 'type' => "int",
138 'deft' => "8100",
139 'range' => "81,",
140 'reqd' => "no" },
141 ];
142
143
144my $options = { 'name' => "OpenOfficeConverter",
145 'desc' => "{OpenOfficeConverter.desc}",
146 'abstract' => "yes",
147 'inherits' => "yes",
148 'args' => $arguments };
149
150sub new {
151 my ($class) = shift (@_);
152 my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
153 push(@$pluginlist, $class);
154
155 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
156 push(@{$hashArgOptLists->{"OptList"}},$options);
157
158 my $self = new BaseMediaConverter($pluginlist, $inputargs,
159 $hashArgOptLists, $auxilary);
160
161 if ($self->{'info_only'}) {
162 # don't worry about any options etc
163 return bless $self, $class;
164 }
165 if (!$openoffice_conversion_available) {
166 $self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;
167
168 my $outhandle = $self->{'outhandle'};
169 &gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
170 }
171
172 $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
173
174 return bless $self, $class;
175
176}
177
178# launch_cmd variable seems to be entirely unused
179sub init {
180 my $self = shift(@_);
181 my ($verbosity, $outhandle, $failhandle) = @_;
182
183 if ($openoffice_conversion_available) {
184 my $oo_port = $self->{'openoffice_port'};
185
186 my $launch_cmd = "soffice";
187 $launch_cmd .= " \"--accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
188 $launch_cmd .= " --headless";
189 $self->{'openoffice_launch_cmd'} = $launch_cmd;
190 }
191
192 $self->{'ootmp_file_paths'} = ();
193}
194
195sub deinit {
196 my $self = shift(@_);
197
198 $self->clean_up_temporary_files();
199}
200
201
202sub convert {
203 my $self = shift(@_);
204 my $source_file_full_path = shift(@_);
205 my $target_file_type = shift(@_);
206 my $convert_options = shift(@_) || "";
207 my $convert_id = shift(@_) || "";
208 my $cache_mode = shift(@_) || "";
209
210 return (0,undef,undef) unless $openoffice_conversion_available;
211 # check the filename
212 return (0,undef,undef) if ( !-f $source_file_full_path);
213
214 my $outhandle = $self->{'outhandle'};
215 my $verbosity = $self->{'verbosity'};
216
217 my $source_file_no_path = &File::Basename::basename($source_file_full_path);
218 # Determine the full name and path of the output file
219 my $target_file_path;
220 if ($self->{'enable_cache'}) {
221 $self->init_cache_for_file($source_file_full_path);
222 my $cache_dir = $self->{'cached_dir'};
223 my $file_root = $self->{'cached_file_root'};
224 $file_root .= "_$convert_id" if ($convert_id ne "");
225 my $target_file = "$file_root.$target_file_type";
226 $target_file_path = &FileUtils::filenameConcatenate($cache_dir,$target_file);
227 }
228 else {
229 $target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
230 push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
231 }
232
233 # Generate and run the convert command
234
235 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
236 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
237 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
238
239 # debugging: print out all the env vars, when import's verbosity is high
240 #foreach my $key (sort(keys %ENV)) {
241 # print $outhandle "$key = $ENV{$key}\n";
242 #}
243
244# *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert:
245# -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
246# into the command to launch jodconverter so that, when using the remote GS server
247# on Windows, jodconverter can find the openoffice installation:
248
249 my $office_short_path;
250 if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
251 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
252 $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
253 } else {
254 $office_short_path = $ENV{'SOFFICE_HOME'};
255 }
256 }
257 else { # SOFFICE_HOME not user-defined, try defaults
258 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
259 $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
260 } else {
261 if (-d "/usr/lib/libreoffice") { # try libreoffice first
262 $office_short_path = "/usr/lib/libreoffice";
263 } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
264 $office_short_path = "/usr/lib/openoffice";
265 } elsif (-d "/usr/lib64/openoffice.org3") { # on 64 bit linux such as kjdon's CentOS
266 $office_short_path = "/usr/lib64/openoffice.org3";
267 } else { # set to default linux officeHome which jodconverter tries for
268 $office_short_path = "/opt/openoffice.org3";
269 }
270 }
271 }
272
273 #if($self->{'openoffice_port'} != $ENV{'SOFFICE_PORT'}) { # if the user changed it
274 #$ENV{'SOFFICE_PORT'} = $self->{'openoffice_port'};
275 #}
276
277 # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
278 # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var.
279 # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
280 # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'SOFFICE_PORT'}";
281 my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $self->{'openoffice_port'}"; # --port $ENV{'SOFFICE_PORT'}
282 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";
283
284 if ($verbosity>2) {
285 print $outhandle "Convert command: $convert_cmd\n";
286 }
287
288 my $print_info = { 'message_prefix' => "OpenOffice Conversion",
289 'message' => "Converting $source_file_no_path to: $target_file_type" };
290 $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");
291
292 my ($regenerated,$result,$had_error)
293 = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
294 if ($had_error) {
295 return (0, $result,$target_file_path);
296 }
297 return (1, $result,$target_file_path);
298}
299
300
301sub convert_without_result {
302 my $self = shift(@_);
303
304 my $source_file_path = shift(@_);
305 my $target_file_type = shift(@_);
306 my $convert_options = shift(@_) || "";
307 my $convert_id = shift(@_) || "";
308
309 return $self->convert($source_file_path,$target_file_type,
310 $convert_options,$convert_id,"without_result");
311}
312
313
314sub tmp_area_convert_fileXX {
315 my $self = shift (@_);
316 my ($output_ext, $input_filename, $textref) = @_;
317
318 my $outhandle = $self->{'outhandle'};
319 my $convert_to = $self->{'convert_to'};
320 my $failhandle = $self->{'failhandle'};
321 my $convert_to_ext = $self->{'convert_to_ext'};
322
323 # derive tmp filename from input filename
324 my ($tailname, $dirname, $suffix)
325 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
326}
327
328
329sub clean_up_temporary_files {
330 my $self = shift(@_);
331
332 foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
333 if (-e $ootmp_file_path) {
334 &FileUtils::removeFiles($ootmp_file_path);
335 }
336 }
337
338 $self->{'ootmp_file_paths'} = ();
339}
340
341
342
3431;
Note: See TracBrowser for help on using the repository browser.