source: gs2-extensions/open-office/trunk/src/perllib/plugins/OpenOfficeConverter.pm@ 25209

Last change on this file since 25209 was 25209, checked in by ak19, 12 years ago

Using the latest version of jodconverter, jodconvertger-2.2.2, since there were conversion issues (like converting Diego's pptx file to html) when tested on the 32bit Win 7 machine.

  • Property svn:executable set to *
File size: 11.9 KB
Line 
1###########################################################################
2#
3# OpenOfficeConverter - helper plugin that does office document conversion
4# using jodconverter combined with OpenOffice
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2010 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package OpenOfficeConverter;
28
29use ConvertBinaryFile;
30use BaseMediaConverter;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35use gsprintf 'gsprintf';
36
37# these two variables mustn't be initialised here or they will get stuck
38# at those values.
39our $openoffice_conversion_available;
40our $no_openoffice_conversion_reason;
41
42BEGIN {
43 @OpenOfficeConverter::ISA = ('BaseMediaConverter');
44
45 # Check that OpenOffice and jodconverter are installed and available on
46 # the path
47 $openoffice_conversion_available = 1;
48 $no_openoffice_conversion_reason = "";
49
50 if (! defined $ENV{'GEXT_OPENOFFICE'}) {
51 $openoffice_conversion_available = 0;
52 $no_openoffice_conversion_reason = "gextopenofficenotinstalled";
53 }
54 else {
55 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
56 #my $jodjar = &util::filename_cat($gextoo_home,"lib","java","jodconverter.jar");
57 my $jodjar = &util::filename_cat($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
58
59 if (!-e $jodjar) {
60 #print STDERR "Failed to find $jodjar\n";
61 $openoffice_conversion_available = 0;
62 $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
63 }
64 else {
65 # test to see if soffice is in path
66 my $cmd = "soffice -headless 2>&1"; # for linux and mac
67 my $status = 0;
68
69 # The port that jodconverter uses to connect to soffice.
70 # We set this to 8100 (else jodconverter would have used
71 # 2002 as the port number, which can clash with some programs
72 # such as LogMeIn. Users can specify another port by setting
73 # the environment variable in setup.bat or <ext>-setup.pl.
74 if(!defined $ENV{'JODCONVERTER_PORT'}) {
75 $ENV{'JODCONVERTER_PORT'} = "8100"; # make this the same as the soffice_port, by default jodconverter would try 8100
76 }
77
78 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
79 if(!defined $ENV{'SOFFICE_HOST'}) {
80 $ENV{'SOFFICE_HOST'} = "localhost";
81 }
82 if(!defined $ENV{'SOFFICE_PORT'}) {
83 $ENV{'SOFFICE_PORT'} = "8100"; # Default that jodconverter is 8100, and since this has to be the same, this best be 8100
84 }
85 # important to have this set when the Greenstone server
86 # and open office is on a remote windows machine
87 if(!defined $ENV{'SOFFICE_HOME'}) {
88 $ENV{'SOFFICE_HOME'} = &util::filename_cat($ENV{'ProgramFiles'},"OpenOffice.org 3");
89 }
90 my $ooffice_dir_guess =
91 &util::filename_cat($ENV{'SOFFICE_HOME'},"program");
92 if (-d $ooffice_dir_guess) {
93 &util::envvar_append("PATH",$ooffice_dir_guess);
94 }
95
96 # for windows, when working on a remote system, want to be able to start OO if
97 # not already running. We'll use the uno socket method to do so. Else client-gli
98 # tends to hang, waiting for the prompt to return after OO has been started up
99 # (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
100
101 # first need to silently check soffice exists else windows will display a popup
102 $status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
103 if ($status == 0) {
104 $cmd = "start \"soffice process\" soffice \"-accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" -headless 2>&1";
105 $cmd .= " >nul";
106 #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";
107 }
108 # else cmd is still undefined
109 }
110 else {
111 # Windows seems to launch OpenOffice as a service (i.e.
112 # automatically puts it in the background).
113 # For Unix putting it in the background needs to be done
114 # explicitly
115
116 $cmd .= " >/dev/null &";
117 }
118
119 $status = system($cmd) if ($status == 0);
120 if ($status != 0) {
121 #print STDERR "Failed to run: $cmd\n";
122 #print STDERR "$!\n";
123 $openoffice_conversion_available = 0;
124 $no_openoffice_conversion_reason = "openofficenotinstalled";
125 }
126 }
127 }
128}
129
130my $arguments = [
131 { 'name' => "openoffice_port",
132 'desc' => "{OpenOfficeConverter.openoffice_port}",
133 'type' => "int",
134 'deft' => "8100",
135 'range' => "81,",
136 'reqd' => "no" },
137 ];
138
139
140my $options = { 'name' => "OpenOfficeConverter",
141 'desc' => "{OpenOfficeConverter.desc}",
142 'abstract' => "yes",
143 'inherits' => "yes",
144 'args' => $arguments };
145
146sub new {
147 my ($class) = shift (@_);
148 my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
149 push(@$pluginlist, $class);
150
151 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
152 push(@{$hashArgOptLists->{"OptList"}},$options);
153
154 my $self = new BaseMediaConverter($pluginlist, $inputargs,
155 $hashArgOptLists, $auxilary);
156
157 if ($self->{'info_only'}) {
158 # don't worry about any options etc
159 return bless $self, $class;
160 }
161 if (!$openoffice_conversion_available) {
162 $self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;
163
164 my $outhandle = $self->{'outhandle'};
165 &gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
166 }
167
168 $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
169
170 return bless $self, $class;
171
172}
173
174sub init {
175 my $self = shift(@_);
176 my ($verbosity, $outhandle, $failhandle) = @_;
177
178 if ($openoffice_conversion_available) {
179 my $oo_port = $self->{'openoffice_port'};
180
181 my $launch_cmd = "soffice";
182 $launch_cmd .= " \"-accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
183 $launch_cmd .= " -headless";
184 $self->{'openoffice_launch_cmd'} = $launch_cmd;
185 }
186
187 $self->{'ootmp_file_paths'} = ();
188}
189
190sub deinit {
191 my $self = shift(@_);
192
193 $self->clean_up_temporary_files();
194}
195
196
197sub convert {
198 my $self = shift(@_);
199 my $source_file_full_path = shift(@_);
200 my $target_file_type = shift(@_);
201 my $convert_options = shift(@_) || "";
202 my $convert_id = shift(@_) || "";
203 my $cache_mode = shift(@_) || "";
204
205 return (0,undef,undef) unless $openoffice_conversion_available;
206 # check the filename
207 return (0,undef,undef) if ( !-f $source_file_full_path);
208
209 my $outhandle = $self->{'outhandle'};
210 my $verbosity = $self->{'verbosity'};
211
212 my $source_file_no_path = &File::Basename::basename($source_file_full_path);
213 # Determine the full name and path of the output file
214 my $target_file_path;
215 if ($self->{'enable_cache'}) {
216 $self->init_cache_for_file($source_file_full_path);
217 my $cache_dir = $self->{'cached_dir'};
218 my $file_root = $self->{'cached_file_root'};
219 $file_root .= "_$convert_id" if ($convert_id ne "");
220 my $target_file = "$file_root.$target_file_type";
221 $target_file_path = &util::filename_cat($cache_dir,$target_file);
222 }
223 else {
224 $target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
225 push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
226 }
227
228 # Generate and run the convert command
229
230 my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
231 #my $jodjar = &util::filename_cat($gextoo_home,"lib","java","jodconverter.jar");
232 my $jodjar = &util::filename_cat($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
233
234 # debugging: print out all the env vars, when import's verbosity is high
235 #foreach my $key (sort(keys %ENV)) {
236 # print $outhandle "$key = $ENV{$key}\n";
237 #}
238
239# *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert:
240# -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
241# into the command to launch jodconverter so that, when using the remote GS server
242# on Windows, jodconverter can find the openoffice installation:
243
244 my $office_short_path;
245 if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
246 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
247 $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
248 } else {
249 $office_short_path = $ENV{'SOFFICE_HOME'};
250 }
251 }
252 else { # SOFFICE_HOME not user-defined, try defaults
253 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
254 $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
255 } else {
256 if (-d "/usr/lib/libreoffice") { # try libreoffice first
257 $office_short_path = "/usr/lib/libreoffice";
258 } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
259 $office_short_path = "/usr/lib/openoffice";
260 } else { # set to default linux officeHome which jodconverter tries for
261 $office_short_path = "/opt/openoffice.org3";
262 }
263 }
264 }
265
266 # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
267 # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var.
268 # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
269 # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'JODCONVERTER_PORT'}";
270 my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $ENV{'JODCONVERTER_PORT'}";
271 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";
272
273 if ($verbosity>2) {
274 print $outhandle "Convert command: $convert_cmd\n";
275 }
276
277 my $print_info = { 'message_prefix' => "OpenOffice Conversion",
278 'message' => "Converting $source_file_no_path to: $target_file_type" };
279 $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");
280
281 my ($regenerated,$result,$had_error)
282 = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
283 if ($had_error) {
284 return (0, $result,$target_file_path);
285 }
286 return (1, $result,$target_file_path);
287}
288
289
290sub convert_without_result {
291 my $self = shift(@_);
292
293 my $source_file_path = shift(@_);
294 my $target_file_type = shift(@_);
295 my $convert_options = shift(@_) || "";
296 my $convert_id = shift(@_) || "";
297
298 return $self->convert($source_file_path,$target_file_type,
299 $convert_options,$convert_id,"without_result");
300}
301
302
303sub tmp_area_convert_fileXX {
304 my $self = shift (@_);
305 my ($output_ext, $input_filename, $textref) = @_;
306
307 my $outhandle = $self->{'outhandle'};
308 my $convert_to = $self->{'convert_to'};
309 my $failhandle = $self->{'failhandle'};
310 my $convert_to_ext = $self->{'convert_to_ext'};
311
312 # derive tmp filename from input filename
313 my ($tailname, $dirname, $suffix)
314 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
315}
316
317
318sub clean_up_temporary_files {
319 my $self = shift(@_);
320
321 foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
322 if (-e $ootmp_file_path) {
323 &util::rm($ootmp_file_path);
324 }
325 }
326
327 $self->{'ootmp_file_paths'} = ();
328}
329
330
331
3321;
Note: See TracBrowser for help on using the repository browser.