source: gs2-extensions/open-office/trunk/src/perllib/plugins/OpenOfficeConverter.pm@ 35415

Last change on this file since 35415 was 35415, checked in by kjdon, 3 years ago

OpenOfficeConverter.pm's BEGIN block running soffice in the background on windows using start, ended up causing GLI to hang (cmdline building didn't hang). It was because of the join() calls on the perl process' stdout/stderr in Safeprocess.InputStreamGobblers() hanging because no eof/eos was received. soffice or other silent external programs launched through perl using system() kept stderr/stdout open even though the external program was supposedly launched in the background. After lots of investigation, Dr Bainbridge found that unlike the system() command in perl/C/C++, Java programs using Runtime.exec() and WScriptHost/CScriptHost script files (like VBScript) can launch external programs fully in the background so that this hanging in GLI doesn't occur. Dr Bainbridge decided we should go with a custom VBScript, created in background-launcher.vbs (committed previously, revision 35414) instead of a Java Program as compiling and setting JAVA_HOME or updating PATH not necessary.

  • Property svn:executable set to *
File size: 14.8 KB
Line 
1###########################################################################
2#
3# OpenOfficeConverter - helper plugin that does office document conversion
4# using jodconverter combined with OpenOffice
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2010 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package OpenOfficeConverter;
28
29use ConvertBinaryFile;
30use BaseMediaConverter;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35use gsprintf 'gsprintf';
36
37# these two variables mustn't be initialised here or they will get stuck
38# at those values.
39our $openoffice_conversion_available;
40our $no_openoffice_conversion_reason;
41
42BEGIN {
43 @OpenOfficeConverter::ISA = ('BaseMediaConverter');
44
45 # Check that OpenOffice and jodconverter are installed and available on
46 # the path
47 $openoffice_conversion_available = 1;
48 $no_openoffice_conversion_reason = "";
49
50 if (! defined $ENV{'GEXT_OPENOFFICE'}) {
51 $openoffice_conversion_available = 0;
52 $no_openoffice_conversion_reason = "gextopenofficenotinstalled";
53 }
54 else {
55 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
56 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
57 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
58
59 if (!-e $jodjar) {
60 #print STDERR "Failed to find $jodjar\n";
61 $openoffice_conversion_available = 0;
62 $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
63 }
64 else {
65 # test to see if soffice is in path
66
67 if(!defined $ENV{'SOFFICE_HOST'}) {
68 $ENV{'SOFFICE_HOST'} = "localhost";
69 }
70 if(!defined $ENV{'SOFFICE_PORT'}) {
71 $ENV{'SOFFICE_PORT'} = "8100";
72 }
73
74 #my $cmd = "soffice --headless 2>&1"; # for linux and mac
75 my $cmd = "soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless"; # basic shared command
76 my $status = 0;
77
78 # No more JODCONVERTER_PORT env var: the port that jodconverter uses is now the
79 # same as SOFFICE_PORT, because the 2 are meant to communicate on the same port.
80 # The default port that jodconverter expects OO (OpenOffice/libreoffice/soffice/staroffice) to listen on is 8100, so we try that.
81 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
82
83 # important to have this set when the Greenstone server
84 # and open office is on a remote windows machine
85 if(!defined $ENV{'SOFFICE_HOME'}) {
86 $ENV{'SOFFICE_HOME'} = &FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"OpenOffice.org 3");
87 }
88 my $ooffice_dir_guess =
89 &FileUtils::filenameConcatenate($ENV{'SOFFICE_HOME'},"program");
90 if (-d $ooffice_dir_guess) {
91 &util::envvar_append("PATH",$ooffice_dir_guess);
92 }
93
94 # for windows, when working on a remote system, want to be able to start OO if
95 # not already running. We'll use the uno socket method to do so. Else client-gli
96 # tends to hang, waiting for the prompt to return after OO has been started up
97 # (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
98
99 # first need to silently check soffice exists else windows will display a popup
100 $status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
101 if ($status == 0) {
102 #$cmd = "start \"soffice process\" $cmd >nul 2>&1"; # order of >nul and 2>&1 matters, see below
103
104 # On Windows, when perl/C/C++ uses system() to launch any standalone process in the background
105 # with "start \"window title\" cmd", perl's child process (despite being mostly independent/detached
106 # from perl) *appears* to share stdout/stderr (stdin) streams with the parent perl process. This is
107 # noticeable with perl launching soffice or any program that neither writes to stderr/out nor sends
108 # eof/eos to indicate parent perl's streams are closed (e.g. Notepad).
109 # This is *not* a problem when perl scripts are run from command line. Command line programs run fine.
110 # But when combined with SafeProcess used by GLI, such silent standalone programs like soffice
111 # cause GLI to hang every time right until the silent program's terminated, because SafeProcess' join()
112 # calls on the perl child process' stderr and stdout block at SafeProcess.InputStreamGobbler.readLine(),
113 # because these stderr/out streams didn't receive eof/eos, implying the silent grandchild process
114 # somehow kept them open.
115 # This is true at Windows/C's system() command level: InputStreamGobblers using join() calls (as SafeProcess
116 # does), combined with a c++ program that uses system() that launches something like Notepad, just all block
117 # until Notepad is closed.
118 # When perl calls a custom java BackgroundLauncher.java program to launch an external program with Runtime.exec(),
119 # this hanging problem doesn't happen as java's exec() is not implemented with Windows C's system().
120 # Similarly, a WScript file (vbs or js script) that uses WinScriptHost.Run method to run an external program
121 # also doesn't exhibit this problem. So we use a custom vbs script to launch external programs like soffice in
122 # the background from perl (to avoid having to set up Java or have the java program compiled up by the release-kits,
123 # since perl building scripts are not just run from GLI but are also run directly from the cmd line).
124
125 $cmd = "CScript //Nologo $ENV{'GSDLHOME'}\\bin\\windows\\background-launcher.vbs $cmd";
126
127 #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";
128 }
129 # else cmd is still undefined
130 }
131 else {
132 # Windows seems to launch OpenOffice as a service (i.e.
133 # automatically puts it in the background).
134 # For Unix putting it in the background needs to be done
135 # explicitly by appending & to the end
136
137 # IMPORTANT! When redirecting output from 2>&1 to a file or null, >/dev/null (or >out.txt) should come BEFORE 2>&1
138 # i.e. $some_cmd >out.txt 2>&1 and likewise $some_cmd >/dev/null 2>&1
139 # Doing it in the wrong order further causes problems with SafeProcess when GLI starts up and calls pluginfo on the pluginslist:
140 # SafeProcess blocks forever on read() from stdout of the process running pluginfo, because there's never data including no eof/eos
141 # on the process' stdout when running this command in the wrong order.
142 $cmd .= " >/dev/null 2>&1 &";
143 }
144
145 #print STDERR "@@@@ running: $cmd\n";
146
147 $status = system($cmd) if ($status == 0);
148 if ($status != 0) {
149 #print STDERR "Failed to run: $cmd\n";
150 #print STDERR "$!\n";
151 $openoffice_conversion_available = 0;
152 $no_openoffice_conversion_reason = "openofficenotinstalled";
153 }
154 }
155 }
156}
157
158my $arguments = [
159 { 'name' => "openoffice_port",
160 'desc' => "{OpenOfficeConverter.openoffice_port}",
161 'type' => "int",
162 'deft' => "8100",
163 'range' => "81,",
164 'reqd' => "no" },
165 ];
166
167
168my $options = { 'name' => "OpenOfficeConverter",
169 'desc' => "{OpenOfficeConverter.desc}",
170 'abstract' => "yes",
171 'inherits' => "yes",
172 'args' => $arguments };
173
174sub new {
175 my ($class) = shift (@_);
176 my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
177 push(@$pluginlist, $class);
178
179 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
180 push(@{$hashArgOptLists->{"OptList"}},$options);
181
182 my $self = new BaseMediaConverter($pluginlist, $inputargs,
183 $hashArgOptLists, $auxilary);
184
185 if ($self->{'info_only'}) {
186 # don't worry about any options etc
187 return bless $self, $class;
188 }
189 if (!$openoffice_conversion_available) {
190 $self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;
191
192 my $outhandle = $self->{'outhandle'};
193 &gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
194 }
195
196 $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
197
198 return bless $self, $class;
199
200}
201
202# launch_cmd variable seems to be entirely unused
203sub init {
204 my $self = shift(@_);
205 my ($verbosity, $outhandle, $failhandle) = @_;
206
207 if ($openoffice_conversion_available) {
208 my $oo_port = $self->{'openoffice_port'};
209
210 my $launch_cmd = "soffice";
211 $launch_cmd .= " \"--accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
212 $launch_cmd .= " --headless";
213 $self->{'openoffice_launch_cmd'} = $launch_cmd;
214 }
215
216 $self->{'ootmp_file_paths'} = ();
217}
218
219sub deinit {
220 my $self = shift(@_);
221
222 $self->clean_up_temporary_files();
223}
224
225
226sub convert {
227 my $self = shift(@_);
228 my $source_file_full_path = shift(@_);
229 my $target_file_type = shift(@_);
230 my $convert_options = shift(@_) || "";
231 my $convert_id = shift(@_) || "";
232 my $cache_mode = shift(@_) || "";
233
234 return (0,undef,undef) unless $openoffice_conversion_available;
235 # check the filename
236 return (0,undef,undef) if ( !-f $source_file_full_path);
237
238 my $outhandle = $self->{'outhandle'};
239 my $verbosity = $self->{'verbosity'};
240
241 my $source_file_no_path = &File::Basename::basename($source_file_full_path);
242 # Determine the full name and path of the output file
243 my $target_file_path;
244 if ($self->{'enable_cache'}) {
245 $self->init_cache_for_file($source_file_full_path);
246 my $cache_dir = $self->{'cached_dir'};
247 my $file_root = $self->{'cached_file_root'};
248 $file_root .= "_$convert_id" if ($convert_id ne "");
249 my $target_file = "$file_root.$target_file_type";
250 $target_file_path = &FileUtils::filenameConcatenate($cache_dir,$target_file);
251 }
252 else {
253 $target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
254 push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
255 }
256
257 # Generate and run the convert command
258
259 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
260 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
261 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
262
263 # debugging: print out all the env vars, when import's verbosity is high
264 #foreach my $key (sort(keys %ENV)) {
265 # print $outhandle "$key = $ENV{$key}\n";
266 #}
267
268# *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert:
269# -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
270# into the command to launch jodconverter so that, when using the remote GS server
271# on Windows, jodconverter can find the openoffice installation:
272
273 my $office_short_path;
274 if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
275 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
276 $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
277 } else {
278 $office_short_path = $ENV{'SOFFICE_HOME'};
279 }
280 }
281 else { # SOFFICE_HOME not user-defined, try defaults
282 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
283 $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
284 } else {
285 if (-d "/usr/lib/libreoffice") { # try libreoffice first
286 $office_short_path = "/usr/lib/libreoffice";
287 } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
288 $office_short_path = "/usr/lib/openoffice";
289 } elsif (-d "/usr/lib64/openoffice.org3") { # on 64 bit linux such as kjdon's CentOS
290 $office_short_path = "/usr/lib64/openoffice.org3";
291 } else { # set to default linux officeHome which jodconverter tries for
292 $office_short_path = "/opt/openoffice.org3";
293 }
294 }
295 }
296
297 #if($self->{'openoffice_port'} != $ENV{'SOFFICE_PORT'}) { # if the user changed it
298 #$ENV{'SOFFICE_PORT'} = $self->{'openoffice_port'};
299 #}
300
301 # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
302 # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var.
303 # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
304 # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'SOFFICE_PORT'}";
305 my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $self->{'openoffice_port'}"; # --port $ENV{'SOFFICE_PORT'}
306 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";
307
308 if ($verbosity>2) {
309 print $outhandle "Convert command: $convert_cmd\n";
310 }
311
312 my $print_info = { 'message_prefix' => "OpenOffice Conversion",
313 'message' => "Converting $source_file_no_path to: $target_file_type" };
314 $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");
315
316 my ($regenerated,$result,$had_error)
317 = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
318 if ($had_error) {
319 return (0, $result,$target_file_path);
320 }
321 return (1, $result,$target_file_path);
322}
323
324
325sub convert_without_result {
326 my $self = shift(@_);
327
328 my $source_file_path = shift(@_);
329 my $target_file_type = shift(@_);
330 my $convert_options = shift(@_) || "";
331 my $convert_id = shift(@_) || "";
332
333 return $self->convert($source_file_path,$target_file_type,
334 $convert_options,$convert_id,"without_result");
335}
336
337
338sub tmp_area_convert_fileXX {
339 my $self = shift (@_);
340 my ($output_ext, $input_filename, $textref) = @_;
341
342 my $outhandle = $self->{'outhandle'};
343 my $convert_to = $self->{'convert_to'};
344 my $failhandle = $self->{'failhandle'};
345 my $convert_to_ext = $self->{'convert_to_ext'};
346
347 # derive tmp filename from input filename
348 my ($tailname, $dirname, $suffix)
349 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
350}
351
352
353sub clean_up_temporary_files {
354 my $self = shift(@_);
355
356 foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
357 if (-e $ootmp_file_path) {
358 &FileUtils::removeFiles($ootmp_file_path);
359 }
360 }
361
362 $self->{'ootmp_file_paths'} = ();
363}
364
365
366
3671;
Note: See TracBrowser for help on using the repository browser.