source: gs2-extensions/open-office/trunk/src/perllib/plugins/OpenOfficeConverter.pm@ 32317

Last change on this file since 32317 was 32317, checked in by ak19, 6 years ago

Basic updates to OpenOffice extension: 1. BasePlugin is now BaseImporter 2. Warnings notified a change from single minus prefix to accept and headless arguments, to double minus prefix. Still haven't resolved 2 bugs: GLI will get stuck when launching when the open office ext is included and if this gets stuck. Hard to track down so far, but this ext's jodconverter jar gets stuck running from the cmdline on these occasions too. And haven't resolved how to get jodconverter to convert (word) docs that contain images to HTML without always embedding the images inline as base64. jodconverter doesn't take the same names for options as the soffice commands it calls. Jodconverter's cmdline usage display is either not complete or the options are very limited.

  • Property svn:executable set to *
File size: 12.3 KB
Line 
1###########################################################################
2#
3# OpenOfficeConverter - helper plugin that does office document conversion
4# using jodconverter combined with OpenOffice
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2010 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package OpenOfficeConverter;
28
29use ConvertBinaryFile;
30use BaseMediaConverter;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35use gsprintf 'gsprintf';
36
37# these two variables mustn't be initialised here or they will get stuck
38# at those values.
39our $openoffice_conversion_available;
40our $no_openoffice_conversion_reason;
41
42BEGIN {
43 @OpenOfficeConverter::ISA = ('BaseMediaConverter');
44
45 # Check that OpenOffice and jodconverter are installed and available on
46 # the path
47 $openoffice_conversion_available = 1;
48 $no_openoffice_conversion_reason = "";
49
50 if (! defined $ENV{'GEXT_OPENOFFICE'}) {
51 $openoffice_conversion_available = 0;
52 $no_openoffice_conversion_reason = "gextopenofficenotinstalled";
53 }
54 else {
55 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
56 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
57 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
58
59 if (!-e $jodjar) {
60 #print STDERR "Failed to find $jodjar\n";
61 $openoffice_conversion_available = 0;
62 $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
63 }
64 else {
65 # test to see if soffice is in path
66
67 if(!defined $ENV{'SOFFICE_HOST'}) {
68 $ENV{'SOFFICE_HOST'} = "localhost";
69 }
70 if(!defined $ENV{'SOFFICE_PORT'}) {
71 $ENV{'SOFFICE_PORT'} = "8100";
72 }
73
74 #my $cmd = "soffice --headless 2>&1"; # for linux and mac
75 my $cmd = "soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless 2>&1"; # for linux and mac
76 my $status = 0;
77
78 # No more JODCONVERTER_PORT env var: the port that jodconverter uses is now the
79 # same as SOFFICE_PORT, because the 2 are meant to communicate on the same port.
80 # The default port that jodconverter expects OO (OpenOffice/libreoffice/soffice/staroffice) to listen on is 8100, so we try that.
81 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
82
83 # important to have this set when the Greenstone server
84 # and open office is on a remote windows machine
85 if(!defined $ENV{'SOFFICE_HOME'}) {
86 $ENV{'SOFFICE_HOME'} = &FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"OpenOffice.org 3");
87 }
88 my $ooffice_dir_guess =
89 &FileUtils::filenameConcatenate($ENV{'SOFFICE_HOME'},"program");
90 if (-d $ooffice_dir_guess) {
91 &util::envvar_append("PATH",$ooffice_dir_guess);
92 }
93
94 # for windows, when working on a remote system, want to be able to start OO if
95 # not already running. We'll use the uno socket method to do so. Else client-gli
96 # tends to hang, waiting for the prompt to return after OO has been started up
97 # (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
98
99 # first need to silently check soffice exists else windows will display a popup
100 $status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
101 if ($status == 0) {
102 $cmd = "start \"soffice process\" soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless 2>&1";
103 $cmd .= " >nul";
104 #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";
105 }
106 # else cmd is still undefined
107 }
108 else {
109 # Windows seems to launch OpenOffice as a service (i.e.
110 # automatically puts it in the background).
111 # For Unix putting it in the background needs to be done
112 # explicitly
113
114 $cmd .= " >/dev/null &";
115 }
116
117 $status = system($cmd) if ($status == 0);
118 if ($status != 0) {
119 #print STDERR "Failed to run: $cmd\n";
120 #print STDERR "$!\n";
121 $openoffice_conversion_available = 0;
122 $no_openoffice_conversion_reason = "openofficenotinstalled";
123 }
124 }
125 }
126}
127
128my $arguments = [
129 { 'name' => "openoffice_port",
130 'desc' => "{OpenOfficeConverter.openoffice_port}",
131 'type' => "int",
132 'deft' => "8100",
133 'range' => "81,",
134 'reqd' => "no" },
135 ];
136
137
138my $options = { 'name' => "OpenOfficeConverter",
139 'desc' => "{OpenOfficeConverter.desc}",
140 'abstract' => "yes",
141 'inherits' => "yes",
142 'args' => $arguments };
143
144sub new {
145 my ($class) = shift (@_);
146 my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
147 push(@$pluginlist, $class);
148
149 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
150 push(@{$hashArgOptLists->{"OptList"}},$options);
151
152 my $self = new BaseMediaConverter($pluginlist, $inputargs,
153 $hashArgOptLists, $auxilary);
154
155 if ($self->{'info_only'}) {
156 # don't worry about any options etc
157 return bless $self, $class;
158 }
159 if (!$openoffice_conversion_available) {
160 $self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;
161
162 my $outhandle = $self->{'outhandle'};
163 &gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
164 }
165
166 $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
167
168 return bless $self, $class;
169
170}
171
172# launch_cmd variable seems to be entirely unused
173sub init {
174 my $self = shift(@_);
175 my ($verbosity, $outhandle, $failhandle) = @_;
176
177 if ($openoffice_conversion_available) {
178 my $oo_port = $self->{'openoffice_port'};
179
180 my $launch_cmd = "soffice";
181 $launch_cmd .= " \"--accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
182 $launch_cmd .= " --headless";
183 $self->{'openoffice_launch_cmd'} = $launch_cmd;
184 }
185
186 $self->{'ootmp_file_paths'} = ();
187}
188
189sub deinit {
190 my $self = shift(@_);
191
192 $self->clean_up_temporary_files();
193}
194
195
196sub convert {
197 my $self = shift(@_);
198 my $source_file_full_path = shift(@_);
199 my $target_file_type = shift(@_);
200 my $convert_options = shift(@_) || "";
201 my $convert_id = shift(@_) || "";
202 my $cache_mode = shift(@_) || "";
203
204 return (0,undef,undef) unless $openoffice_conversion_available;
205 # check the filename
206 return (0,undef,undef) if ( !-f $source_file_full_path);
207
208 my $outhandle = $self->{'outhandle'};
209 my $verbosity = $self->{'verbosity'};
210
211 my $source_file_no_path = &File::Basename::basename($source_file_full_path);
212 # Determine the full name and path of the output file
213 my $target_file_path;
214 if ($self->{'enable_cache'}) {
215 $self->init_cache_for_file($source_file_full_path);
216 my $cache_dir = $self->{'cached_dir'};
217 my $file_root = $self->{'cached_file_root'};
218 $file_root .= "_$convert_id" if ($convert_id ne "");
219 my $target_file = "$file_root.$target_file_type";
220 $target_file_path = &FileUtils::filenameConcatenate($cache_dir,$target_file);
221 }
222 else {
223 $target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
224 push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
225 }
226
227 # Generate and run the convert command
228
229 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
230 #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
231 my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
232
233 # debugging: print out all the env vars, when import's verbosity is high
234 #foreach my $key (sort(keys %ENV)) {
235 # print $outhandle "$key = $ENV{$key}\n";
236 #}
237
238# *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert:
239# -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
240# into the command to launch jodconverter so that, when using the remote GS server
241# on Windows, jodconverter can find the openoffice installation:
242
243 my $office_short_path;
244 if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
245 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
246 $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
247 } else {
248 $office_short_path = $ENV{'SOFFICE_HOME'};
249 }
250 }
251 else { # SOFFICE_HOME not user-defined, try defaults
252 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
253 $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
254 } else {
255 if (-d "/usr/lib/libreoffice") { # try libreoffice first
256 $office_short_path = "/usr/lib/libreoffice";
257 } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
258 $office_short_path = "/usr/lib/openoffice";
259 } elsif (-d "/usr/lib64/openoffice.org3") { # on 64 bit linux such as kjdon's CentOS
260 $office_short_path = "/usr/lib64/openoffice.org3";
261 } else { # set to default linux officeHome which jodconverter tries for
262 $office_short_path = "/opt/openoffice.org3";
263 }
264 }
265 }
266
267 #if($self->{'openoffice_port'} != $ENV{'SOFFICE_PORT'}) { # if the user changed it
268 #$ENV{'SOFFICE_PORT'} = $self->{'openoffice_port'};
269 #}
270
271 # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
272 # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var.
273 # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
274 # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'SOFFICE_PORT'}";
275 my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $self->{'openoffice_port'}"; # --port $ENV{'SOFFICE_PORT'}
276 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";
277
278 if ($verbosity>2) {
279 print $outhandle "Convert command: $convert_cmd\n";
280 }
281
282 my $print_info = { 'message_prefix' => "OpenOffice Conversion",
283 'message' => "Converting $source_file_no_path to: $target_file_type" };
284 $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");
285
286 my ($regenerated,$result,$had_error)
287 = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
288 if ($had_error) {
289 return (0, $result,$target_file_path);
290 }
291 return (1, $result,$target_file_path);
292}
293
294
295sub convert_without_result {
296 my $self = shift(@_);
297
298 my $source_file_path = shift(@_);
299 my $target_file_type = shift(@_);
300 my $convert_options = shift(@_) || "";
301 my $convert_id = shift(@_) || "";
302
303 return $self->convert($source_file_path,$target_file_type,
304 $convert_options,$convert_id,"without_result");
305}
306
307
308sub tmp_area_convert_fileXX {
309 my $self = shift (@_);
310 my ($output_ext, $input_filename, $textref) = @_;
311
312 my $outhandle = $self->{'outhandle'};
313 my $convert_to = $self->{'convert_to'};
314 my $failhandle = $self->{'failhandle'};
315 my $convert_to_ext = $self->{'convert_to_ext'};
316
317 # derive tmp filename from input filename
318 my ($tailname, $dirname, $suffix)
319 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
320}
321
322
323sub clean_up_temporary_files {
324 my $self = shift(@_);
325
326 foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
327 if (-e $ootmp_file_path) {
328 &FileUtils::removeFiles($ootmp_file_path);
329 }
330 }
331
332 $self->{'ootmp_file_paths'} = ();
333}
334
335
336
3371;
Note: See TracBrowser for help on using the repository browser.