source: gs2-extensions/open-office/trunk/src/perllib/plugins/OpenOfficeConverter.pm@ 35453

Last change on this file since 35453 was 35453, checked in by anupama, 3 years ago

Using the newly added util subroutine get_first_existing_dir(list_of_dirs), just committed prior to this, to search through all known default locations where openoffice and libre_office may be installed for the first directory that may exist. This is to further reduce the likelihood of having to set SOFFICE_HOME. Was found necessary on this test machine where LibreOffice instead of OpenOffice is installed and not in the default location of OpenOffice.

  • Property svn:executable set to *
File size: 15.6 KB
Line 
1###########################################################################
2#
3# OpenOfficeConverter - helper plugin that does office document conversion
4# using jodconverter combined with OpenOffice
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2010 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package OpenOfficeConverter;
28
29use ConvertBinaryFile;
30use BaseMediaConverter;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35use gsprintf 'gsprintf';
36
37# these two variables mustn't be initialised here or they will get stuck
38# at those values.
39our $openoffice_conversion_available;
40our $no_openoffice_conversion_reason;
41
42BEGIN {
43 @OpenOfficeConverter::ISA = ('BaseMediaConverter');
44
45 # Check that OpenOffice and jodconverter are installed and available on
46 # the path
47 $openoffice_conversion_available = 1;
48 $no_openoffice_conversion_reason = "";
49
50 if (! defined $ENV{'GEXT_OPENOFFICE'}) {
51 $openoffice_conversion_available = 0;
52 $no_openoffice_conversion_reason = "gextopenofficenotinstalled";
53 }
54 else {
55 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
56 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
57 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
58
59 if (!-e $jodjar) {
60 #print STDERR "Failed to find $jodjar\n";
61 $openoffice_conversion_available = 0;
62 $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
63 }
64 else {
65 # test to see if soffice is in path
66
67 if(!defined $ENV{'SOFFICE_HOST'}) {
68 $ENV{'SOFFICE_HOST'} = "localhost";
69 }
70 if(!defined $ENV{'SOFFICE_PORT'}) {
71 $ENV{'SOFFICE_PORT'} = "8100";
72 }
73
74 #my $cmd = "soffice --headless 2>&1"; # for linux and mac
75 my $cmd = "soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless"; # basic shared command
76 my $status = 0;
77
78 # No more JODCONVERTER_PORT env var: the port that jodconverter uses is now the
79 # same as SOFFICE_PORT, because the 2 are meant to communicate on the same port.
80 # The default port that jodconverter expects OO (OpenOffice/libreoffice/soffice/staroffice) to listen on is 8100, so we try that.
81 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
82
83 # important to have this set when the Greenstone server
84 # and open office is on a remote windows machine
85 if(!defined $ENV{'SOFFICE_HOME'}) {
86 # check all the favourite haunts of openoffice in turn
87 $ENV{'SOFFICE_HOME'} = &util::get_first_existing_dir(
88 &FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"OpenOffice.org 3"),
89 &FileUtils::filenameConcatenate($ENV{'ProgramFiles(X86)'},"OpenOffice.org 3"),
90 &FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"LibreOffice"),
91 &FileUtils::filenameConcatenate($ENV{'ProgramFiles(X86)'},"LibreOffice")
92 );
93
94 # These 2 env vars are diff and appear as expected when printed out in a cmd prompt on a 64 bit Windows. But on same machine
95 # in perl, at least when launched through (Java 32 bit) GLI, their values shown as the same: as "C:\Program Files (x86)"
96 #print STDERR "@@@@ ProgFiles: $ENV{'ProgramFiles'}\n";
97 #print STDERR "@@@@ ProgFiles x86: $ENV{'ProgramFiles(X86)'}\n";
98 }
99 if(defined $ENV{'SOFFICE_HOME'}) {
100 #print STDERR "@@@@ Found $ENV{'SOFFICE_HOME'}\n";
101 my $ooffice_dir_guess =
102 &FileUtils::filenameConcatenate($ENV{'SOFFICE_HOME'},"program");
103 if (-d $ooffice_dir_guess) {
104 &util::envvar_append("PATH",$ooffice_dir_guess);
105 }
106 }
107
108 # for windows, when working on a remote system, want to be able to start OO if
109 # not already running. We'll use the uno socket method to do so. Else client-gli
110 # tends to hang, waiting for the prompt to return after OO has been started up
111 # (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
112
113 # first need to silently check soffice exists else windows will display a popup
114 $status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
115 if ($status == 0) {
116 #$cmd = "start \"soffice process\" $cmd >nul 2>&1"; # order of >nul and 2>&1 matters, see below
117
118 # On Windows, when perl/C/C++ uses system() to launch any standalone process in the background
119 # with "start \"window title\" cmd", perl's child process (despite being mostly independent/detached
120 # from perl) *appears* to share stdout/stderr (stdin) streams with the parent perl process. This is
121 # noticeable with perl launching soffice or any program that neither writes to stderr/out nor sends
122 # eof/eos to indicate parent perl's streams are closed (e.g. Notepad).
123 # This is *not* a problem when perl scripts are run from command line. Command line programs run fine.
124 # But when combined with SafeProcess used by GLI, such silent standalone programs like soffice
125 # cause GLI to hang every time right until the silent program's terminated, because SafeProcess' join()
126 # calls on the perl child process' stderr and stdout block at SafeProcess.InputStreamGobbler.readLine(),
127 # because these stderr/out streams didn't receive eof/eos, implying the silent grandchild process
128 # somehow kept them open.
129 # This is true at Windows/C's system() command level: InputStreamGobblers using join() calls (as SafeProcess
130 # does), combined with a c++ program that uses system() that launches something like Notepad, just all block
131 # until Notepad is closed.
132 # When perl calls a custom java BackgroundLauncher.java program to launch an external program with Runtime.exec(),
133 # this hanging problem doesn't happen as java's exec() is not implemented with Windows C's system().
134 # Similarly, a WScript file (vbs or js script) that uses WinScriptHost.Run method to run an external program
135 # also doesn't exhibit this problem. So we use a custom vbs script to launch external programs like soffice in
136 # the background from perl (to avoid having to set up Java or have the java program compiled up by the release-kits,
137 # since perl building scripts are not just run from GLI but are also run directly from the cmd line).
138
139 $cmd = "CScript //Nologo $ENV{'GSDLHOME'}\\bin\\windows\\background-launcher.vbs $cmd";
140
141 #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";
142 }
143 # else cmd is still undefined
144 }
145 else {
146 # Windows seems to launch OpenOffice as a service (i.e.
147 # automatically puts it in the background).
148 # For Unix putting it in the background needs to be done
149 # explicitly by appending & to the end
150
151 # IMPORTANT! When redirecting output from 2>&1 to a file or null, >/dev/null (or >out.txt) should come BEFORE 2>&1
152 # i.e. $some_cmd >out.txt 2>&1 and likewise $some_cmd >/dev/null 2>&1
153 # Doing it in the wrong order further causes problems with SafeProcess when GLI starts up and calls pluginfo on the pluginslist:
154 # SafeProcess blocks forever on read() from stdout of the process running pluginfo, because there's never data including no eof/eos
155 # on the process' stdout when running this command in the wrong order.
156 $cmd .= " >/dev/null 2>&1 &";
157 }
158
159 #print STDERR "@@@@ running: $cmd\n";
160
161 $status = system($cmd) if ($status == 0);
162 if ($status != 0) {
163 #print STDERR "Failed to run: $cmd\n";
164 #print STDERR "$!\n";
165 $openoffice_conversion_available = 0;
166 $no_openoffice_conversion_reason = "openofficenotinstalled";
167 }
168 }
169 }
170}
171
172my $arguments = [
173 { 'name' => "openoffice_port",
174 'desc' => "{OpenOfficeConverter.openoffice_port}",
175 'type' => "int",
176 'deft' => "8100",
177 'range' => "81,",
178 'reqd' => "no" },
179 ];
180
181
182my $options = { 'name' => "OpenOfficeConverter",
183 'desc' => "{OpenOfficeConverter.desc}",
184 'abstract' => "yes",
185 'inherits' => "yes",
186 'args' => $arguments };
187
188sub new {
189 my ($class) = shift (@_);
190 my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
191 push(@$pluginlist, $class);
192
193 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
194 push(@{$hashArgOptLists->{"OptList"}},$options);
195
196 my $self = new BaseMediaConverter($pluginlist, $inputargs,
197 $hashArgOptLists, $auxilary);
198
199 if ($self->{'info_only'}) {
200 # don't worry about any options etc
201 return bless $self, $class;
202 }
203 if (!$openoffice_conversion_available) {
204 $self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;
205
206 my $outhandle = $self->{'outhandle'};
207 &gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
208 }
209
210 $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
211
212 return bless $self, $class;
213
214}
215
216# launch_cmd variable seems to be entirely unused
217sub init {
218 my $self = shift(@_);
219 my ($verbosity, $outhandle, $failhandle) = @_;
220
221 if ($openoffice_conversion_available) {
222 my $oo_port = $self->{'openoffice_port'};
223
224 my $launch_cmd = "soffice";
225 $launch_cmd .= " \"--accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
226 $launch_cmd .= " --headless";
227 $self->{'openoffice_launch_cmd'} = $launch_cmd;
228 }
229
230 $self->{'ootmp_file_paths'} = ();
231}
232
233sub deinit {
234 my $self = shift(@_);
235
236 $self->clean_up_temporary_files();
237}
238
239
240sub convert {
241 my $self = shift(@_);
242 my $source_file_full_path = shift(@_);
243 my $target_file_type = shift(@_);
244 my $convert_options = shift(@_) || "";
245 my $convert_id = shift(@_) || "";
246 my $cache_mode = shift(@_) || "";
247
248 return (0,undef,undef) unless $openoffice_conversion_available;
249 # check the filename
250 return (0,undef,undef) if ( !-f $source_file_full_path);
251
252 my $outhandle = $self->{'outhandle'};
253 my $verbosity = $self->{'verbosity'};
254
255 my $source_file_no_path = &File::Basename::basename($source_file_full_path);
256 # Determine the full name and path of the output file
257 my $target_file_path;
258 if ($self->{'enable_cache'}) {
259 $self->init_cache_for_file($source_file_full_path);
260 my $cache_dir = $self->{'cached_dir'};
261 my $file_root = $self->{'cached_file_root'};
262 $file_root .= "_$convert_id" if ($convert_id ne "");
263 my $target_file = "$file_root.$target_file_type";
264 $target_file_path = &FileUtils::filenameConcatenate($cache_dir,$target_file);
265 }
266 else {
267 $target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
268 push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
269 }
270
271 # Generate and run the convert command
272
273 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
274 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
275 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
276
277 # debugging: print out all the env vars, when import's verbosity is high
278 #foreach my $key (sort(keys %ENV)) {
279 # print $outhandle "$key = $ENV{$key}\n";
280 #}
281
282# *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert:
283# -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
284# into the command to launch jodconverter so that, when using the remote GS server
285# on Windows, jodconverter can find the openoffice installation:
286
287 my $office_short_path;
288 if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
289 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
290 $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
291 } else {
292 $office_short_path = $ENV{'SOFFICE_HOME'};
293 }
294 }
295 else { # SOFFICE_HOME not user-defined, try defaults
296 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
297 $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
298 } else {
299 if (-d "/usr/lib/libreoffice") { # try libreoffice first
300 $office_short_path = "/usr/lib/libreoffice";
301 } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
302 $office_short_path = "/usr/lib/openoffice";
303 } elsif (-d "/usr/lib64/openoffice.org3") { # on 64 bit linux such as kjdon's CentOS
304 $office_short_path = "/usr/lib64/openoffice.org3";
305 } else { # set to default linux officeHome which jodconverter tries for
306 $office_short_path = "/opt/openoffice.org3";
307 }
308 }
309 }
310
311 #if($self->{'openoffice_port'} != $ENV{'SOFFICE_PORT'}) { # if the user changed it
312 #$ENV{'SOFFICE_PORT'} = $self->{'openoffice_port'};
313 #}
314
315 # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
316 # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var.
317 # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
318 # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'SOFFICE_PORT'}";
319 my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $self->{'openoffice_port'}"; # --port $ENV{'SOFFICE_PORT'}
320 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";
321
322 if ($verbosity>2) {
323 print $outhandle "Convert command: $convert_cmd\n";
324 }
325
326 my $print_info = { 'message_prefix' => "OpenOffice Conversion",
327 'message' => "Converting $source_file_no_path to: $target_file_type" };
328 $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");
329
330 my ($regenerated,$result,$had_error)
331 = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
332 if ($had_error) {
333 return (0, $result,$target_file_path);
334 }
335 return (1, $result,$target_file_path);
336}
337
338
339sub convert_without_result {
340 my $self = shift(@_);
341
342 my $source_file_path = shift(@_);
343 my $target_file_type = shift(@_);
344 my $convert_options = shift(@_) || "";
345 my $convert_id = shift(@_) || "";
346
347 return $self->convert($source_file_path,$target_file_type,
348 $convert_options,$convert_id,"without_result");
349}
350
351
352sub tmp_area_convert_fileXX {
353 my $self = shift (@_);
354 my ($output_ext, $input_filename, $textref) = @_;
355
356 my $outhandle = $self->{'outhandle'};
357 my $convert_to = $self->{'convert_to'};
358 my $failhandle = $self->{'failhandle'};
359 my $convert_to_ext = $self->{'convert_to_ext'};
360
361 # derive tmp filename from input filename
362 my ($tailname, $dirname, $suffix)
363 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
364}
365
366
367sub clean_up_temporary_files {
368 my $self = shift(@_);
369
370 foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
371 if (-e $ootmp_file_path) {
372 &FileUtils::removeFiles($ootmp_file_path);
373 }
374 }
375
376 $self->{'ootmp_file_paths'} = ();
377}
378
379
380
3811;
Note: See TracBrowser for help on using the repository browser.