root/gs2-extensions/open-office/trunk/src/perllib/plugins/OpenOfficeConverter.pm @ 32317

Revision 32317, 12.3 KB (checked in by ak19, 2 years ago)

Basic updates to OpenOffice? extension: 1. BasePlugin? is now BaseImporter? 2. Warnings notified a change from single minus prefix to accept and headless arguments, to double minus prefix. Still haven't resolved 2 bugs: GLI will get stuck when launching when the open office ext is included and if this gets stuck. Hard to track down so far, but this ext's jodconverter jar gets stuck running from the cmdline on these occasions too. And haven't resolved how to get jodconverter to convert (word) docs that contain images to HTML without always embedding the images inline as base64. jodconverter doesn't take the same names for options as the soffice commands it calls. Jodconverter's cmdline usage display is either not complete or the options are very limited.

  • Property svn:executable set to *
Line 
1###########################################################################
2#
3# OpenOfficeConverter - helper plugin that does office document conversion
4#                       using jodconverter combined with OpenOffice
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2010 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package OpenOfficeConverter;
28
29use ConvertBinaryFile;
30use BaseMediaConverter;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35use gsprintf 'gsprintf';
36
37# these two variables mustn't be initialised here or they will get stuck
38# at those values.
39our $openoffice_conversion_available;
40our $no_openoffice_conversion_reason;
41
42BEGIN {
43    @OpenOfficeConverter::ISA = ('BaseMediaConverter');
44
45    # Check that OpenOffice and jodconverter are installed and available on
46    # the path
47    $openoffice_conversion_available = 1;
48    $no_openoffice_conversion_reason = "";
49   
50    if (! defined $ENV{'GEXT_OPENOFFICE'}) {
51    $openoffice_conversion_available = 0;
52    $no_openoffice_conversion_reason = "gextopenofficenotinstalled";
53    }
54    else {
55    my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
56    #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
57    my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
58
59    if (!-e $jodjar) {
60        #print STDERR "Failed to find $jodjar\n";
61        $openoffice_conversion_available = 0;
62        $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
63    }
64    else {
65        # test to see if soffice is in path     
66
67        if(!defined $ENV{'SOFFICE_HOST'}) {
68        $ENV{'SOFFICE_HOST'} = "localhost";
69        }
70        if(!defined $ENV{'SOFFICE_PORT'}) {
71        $ENV{'SOFFICE_PORT'} = "8100";
72        }
73       
74        #my $cmd = "soffice --headless 2>&1"; # for linux and mac
75        my $cmd = "soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless 2>&1"; # for linux and mac
76        my $status = 0;
77
78        # No more JODCONVERTER_PORT env var: the port that jodconverter uses is now the
79        # same as SOFFICE_PORT, because the 2 are meant to communicate on the same port.
80        # The default port that jodconverter expects OO (OpenOffice/libreoffice/soffice/staroffice) to listen on is 8100, so we try that.
81        if ($ENV{'GSDLOS'} =~ m/^windows$/) {       
82       
83        # important to have this set when the Greenstone server
84        # and open office is on a remote windows machine
85        if(!defined $ENV{'SOFFICE_HOME'}) {
86            $ENV{'SOFFICE_HOME'} = &FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"OpenOffice.org 3");
87        }
88        my $ooffice_dir_guess =
89            &FileUtils::filenameConcatenate($ENV{'SOFFICE_HOME'},"program");
90        if (-d  $ooffice_dir_guess) {
91            &util::envvar_append("PATH",$ooffice_dir_guess);
92        }
93
94        # for windows, when working on a remote system, want to be able to start OO if
95        # not already running. We'll use the uno socket method to do so. Else client-gli
96        # tends to hang, waiting for the prompt to return after OO has been started up
97        # (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
98       
99        # first need to silently check soffice exists else windows will display a popup
100        $status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
101        if ($status == 0) {
102            $cmd = "start \"soffice process\" soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless 2>&1";
103            $cmd .= " >nul";
104            #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";           
105        }
106        # else cmd is still undefined
107        }
108        else {
109        # Windows seems to launch OpenOffice as a service (i.e.
110        # automatically puts it in the background). 
111        # For Unix putting it in the background needs to be done
112        # explicitly
113
114        $cmd .= " >/dev/null &";
115        }
116       
117        $status = system($cmd) if ($status == 0);
118        if ($status != 0) {
119        #print STDERR "Failed to run: $cmd\n";
120        #print STDERR "$!\n";
121        $openoffice_conversion_available = 0;
122        $no_openoffice_conversion_reason = "openofficenotinstalled";
123        }
124    }
125    }
126}
127
128my $arguments = [
129    { 'name' => "openoffice_port",
130      'desc' => "{OpenOfficeConverter.openoffice_port}",
131      'type' => "int",
132      'deft' => "8100",
133      'range' => "81,",
134      'reqd' => "no" },
135    ];
136
137
138my $options = { 'name' => "OpenOfficeConverter",
139        'desc' => "{OpenOfficeConverter.desc}",
140        'abstract' => "yes",
141        'inherits' => "yes",
142        'args' => $arguments };
143
144sub new {
145    my ($class) = shift (@_);
146    my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
147    push(@$pluginlist, $class);
148
149    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
150    push(@{$hashArgOptLists->{"OptList"}},$options);
151
152    my $self = new BaseMediaConverter($pluginlist, $inputargs,
153                      $hashArgOptLists, $auxilary);
154
155    if ($self->{'info_only'}) {
156    # don't worry about any options etc
157    return bless $self, $class;
158    }
159    if (!$openoffice_conversion_available) {
160    $self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;
161
162    my $outhandle = $self->{'outhandle'};
163    &gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
164    } 
165
166    $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
167   
168    return bless $self, $class;
169
170}
171
172# launch_cmd variable seems to be entirely unused
173sub init {
174    my $self = shift(@_);
175    my ($verbosity, $outhandle, $failhandle) = @_;
176
177    if ($openoffice_conversion_available) {
178    my $oo_port = $self->{'openoffice_port'};
179
180    my $launch_cmd = "soffice";
181    $launch_cmd .= " \"--accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
182    $launch_cmd .= " --headless";
183    $self->{'openoffice_launch_cmd'} = $launch_cmd;
184    }
185
186    $self->{'ootmp_file_paths'} = ();
187}
188
189sub deinit {
190    my $self = shift(@_);
191
192    $self->clean_up_temporary_files();
193}
194
195
196sub convert {
197    my $self = shift(@_);
198    my $source_file_full_path = shift(@_);
199    my $target_file_type      = shift(@_);
200    my $convert_options       = shift(@_) || "";
201    my $convert_id            = shift(@_) || "";
202    my $cache_mode            = shift(@_) || "";
203
204    return (0,undef,undef) unless $openoffice_conversion_available;
205    # check the filename
206    return (0,undef,undef) if ( !-f $source_file_full_path);
207
208    my $outhandle = $self->{'outhandle'};
209    my $verbosity = $self->{'verbosity'};
210
211    my $source_file_no_path = &File::Basename::basename($source_file_full_path);
212    # Determine the full name and path of the output file
213    my $target_file_path;
214    if ($self->{'enable_cache'}) {
215    $self->init_cache_for_file($source_file_full_path);
216    my $cache_dir = $self->{'cached_dir'};
217    my $file_root = $self->{'cached_file_root'};
218    $file_root .= "_$convert_id" if ($convert_id ne "");
219    my $target_file = "$file_root.$target_file_type";
220    $target_file_path = &FileUtils::filenameConcatenate($cache_dir,$target_file);
221    }
222    else {
223    $target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
224    push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
225    }
226
227    # Generate and run the convert command
228
229    my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
230    #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
231    my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");
232
233    # debugging: print out all the env vars, when import's verbosity is high
234    #foreach my $key (sort(keys %ENV)) {
235    #   print $outhandle "$key = $ENV{$key}\n";
236    #}
237
238#    *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert:
239#    -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
240#    into the command to launch jodconverter so that, when using the remote GS server
241#    on Windows, jodconverter can find the openoffice installation:
242
243    my $office_short_path;
244    if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
245    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
246        $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
247    } else {
248        $office_short_path = $ENV{'SOFFICE_HOME'};
249    }
250    }
251    else { # SOFFICE_HOME not user-defined, try defaults
252    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
253        $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
254    } else {
255        if (-d "/usr/lib/libreoffice") { # try libreoffice first
256        $office_short_path = "/usr/lib/libreoffice";
257        } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
258        $office_short_path = "/usr/lib/openoffice";
259        } elsif (-d "/usr/lib64/openoffice.org3") { # on 64 bit linux such as kjdon's CentOS
260        $office_short_path = "/usr/lib64/openoffice.org3";
261        } else { # set to default linux officeHome which jodconverter tries for
262        $office_short_path = "/opt/openoffice.org3";
263        }
264    }
265    }
266
267    #if($self->{'openoffice_port'} != $ENV{'SOFFICE_PORT'}) { # if the user changed it
268    #$ENV{'SOFFICE_PORT'} = $self->{'openoffice_port'};
269    #}
270
271    # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
272    # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var.
273    # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
274    # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'SOFFICE_PORT'}";   
275    my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $self->{'openoffice_port'}"; # --port $ENV{'SOFFICE_PORT'}
276    $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";
277
278    if ($verbosity>2) {
279    print $outhandle "Convert command: $convert_cmd\n";
280    }
281
282    my $print_info = { 'message_prefix' => "OpenOffice Conversion",
283               'message' => "Converting $source_file_no_path to: $target_file_type" };
284    $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");
285
286    my ($regenerated,$result,$had_error)
287    = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
288    if ($had_error) {
289    return (0, $result,$target_file_path);
290    }
291    return (1, $result,$target_file_path);
292}
293
294
295sub convert_without_result {
296    my $self = shift(@_);
297
298    my $source_file_path = shift(@_);
299    my $target_file_type = shift(@_);
300    my $convert_options  = shift(@_) || "";
301    my $convert_id       = shift(@_) || "";
302
303    return $self->convert($source_file_path,$target_file_type,
304              $convert_options,$convert_id,"without_result");
305}
306
307
308sub tmp_area_convert_fileXX {
309    my $self = shift (@_);
310    my ($output_ext, $input_filename, $textref) = @_;
311   
312    my $outhandle = $self->{'outhandle'};
313    my $convert_to = $self->{'convert_to'};
314    my $failhandle = $self->{'failhandle'};
315    my $convert_to_ext = $self->{'convert_to_ext'};
316   
317    # derive tmp filename from input filename
318    my ($tailname, $dirname, $suffix)
319    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
320}
321
322
323sub clean_up_temporary_files {
324    my $self = shift(@_);
325
326    foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
327    if (-e $ootmp_file_path) {
328        &FileUtils::removeFiles($ootmp_file_path);
329    }
330    }
331
332    $self->{'ootmp_file_paths'} = ();
333}
334
335
336
3371; 
Note: See TracBrowser for help on using the browser.