source: main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm@ 22861

Last change on this file since 22861 was 22861, checked in by kjdon, 14 years ago

now uses new AutoLoadConverters instead of AutoloadConverterScripting. This doesn't inherit from ConvertBinaryFile, so these plugins all inherit from that again. Now we can initialise the converters, fix up the modifications to the arguments, before parsing them when we do new ConvertBinaryFile. PowerPointPlugin incomplete and still needs lots of work done for processing the result on open office conversion

  • Property svn:keywords set to Author Date Id Revision
File size: 9.3 KB
Line 
1###########################################################################
2#
3# PowerPointPlugin.pm -- plugin for importing Microsoft PowerPoint files.
4# (basic version supports versions 95 and 97)
5# (through OpenOffice extension, supports all contemporary formats)
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2002 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29package PowerPointPlugin;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33no strict 'subs';
34
35use gsprintf 'gsprintf';
36
37use AutoLoadConverters;
38use ConvertBinaryFile;
39
40sub BEGIN {
41 @PowerPointPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters');
42}
43
44my $openoffice_available = 0;
45
46my $windows_convert_to_list =
47 [ { 'name' => "auto",
48 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
49 { 'name' => "html",
50 'desc' => "{ConvertBinaryFile.convert_to.html}" },
51 { 'name' => "text",
52 'desc' => "{ConvertBinaryFile.convert_to.text}" },
53 { 'name' => "pagedimg_jpg",
54 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
55 { 'name' => "pagedimg_gif",
56 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
57 { 'name' => "pagedimg_png",
58 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
59 ];
60
61my $openoffice_convert_to_list =
62 [ { 'name' => "auto",
63 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
64 { 'name' => "html",
65 'desc' => "{PowerPointPlugin.convert_to.oo_html}" },
66 { 'name' => "text",
67 'desc' => "{ConvertBinaryFile.convert_to.text}" },
68 { 'name' => "pagedimg",
69 'desc' => "{PowerPointPlugin.convert_to.pagedimg}" }
70 ];
71
72my $arguments =
73 [ { 'name' => "process_exp",
74 'desc' => "{BasePlugin.process_exp}",
75 'type' => "regexp",
76 'reqd' => "no",
77 'deft' => "&get_default_process_exp()", # delayed (see below)
78 }
79 ];
80
81my $opt_windows_args =
82 [ { 'name' => "convert_to",
83 'desc' => "{ConvertBinaryFile.convert_to}",
84 'type' => "enum",
85 'reqd' => "yes",
86 'list' => $windows_convert_to_list,
87 'deft' => "html" },
88 { 'name' => "windows_scripting",
89 'desc' => "{PowerPointPlugin.windows_scripting}",
90 'type' => "flag",
91 'reqd' => "no" }
92 ];
93
94my $opt_office_args =
95 [ { 'name' => "convert_to",
96 'desc' => "{ConvertBinaryFile.convert_to}",
97 'type' => "enum",
98 'reqd' => "yes",
99 'list' => $openoffice_convert_to_list,
100 'deft' => "html" }
101 ];
102
103my $options = { 'name' => "PowerPointPlugin",
104 'desc' => "{PowerPointPlugin.desc}",
105 'abstract' => "no",
106 'inherits' => "yes",
107 'srcreplaceable' => "yes", # Source docs in PPT format can be replaced with GS-generated html
108 'args' => $arguments };
109
110sub new {
111 my ($class) = shift (@_);
112 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
113 push(@$pluginlist, $class);
114
115 # this bit needs to happen later after the arguments array has been
116 # finished - used for parsing the input args.
117 # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
118 # this one needs to go in first, to get the print info in the right order
119 push(@{$hashArgOptLists->{"OptList"}},$options);
120
121 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
122 push(@$arguments,@$opt_windows_args);
123 }
124
125 my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1);
126
127 if ($auto_converter_self->{'openoffice_available'}) {
128 push (@$arguments,@$opt_office_args);
129 $openoffice_available = 1;
130 }
131 # TODO need to do the case where they are both enabled!!! what will the convert to list be???
132
133 # evaluate the default for process_exp - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args.
134 foreach my $a (@$arguments) {
135 if ($a->{'name'} eq "process_exp") {
136 my $eval_expr = $a->{'deft'};
137 $a->{'deft'} = eval "$eval_expr";
138 last;
139 }
140 }
141
142 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
143
144 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
145 my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self);
146
147 if ($self->{'info_only'}) {
148 # don't worry about any options etc
149 return bless $self, $class;
150 }
151
152 $self = bless $self, $class;
153 $self->{'filename_extension'} = "ppt";
154 $self->{'file_type'} = "PPT";
155
156 if ($self->{'convert_to'} eq "auto") {
157 if ($self->{'windows_scripting'}) {
158 $self->{'convert_to'} = "pagedimg_jpg";
159 }
160 else {
161 $self->{'convert_to'} = "html";
162 }
163 }
164
165 my $outhandle = $self->{'outhandle'};
166
167 # can't have windows_scripting and openoffice_conversion at the same time
168 if ($self->{'windows_scripting'} && $self->{'openoffice_conversion'}) {
169 print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_conversion\n";
170 print $outhandle " on at the same time. Defaulting to -windows_scripting\n";
171 $self->{'openoffice_conversion'} = 0;
172 }
173
174 #these are passed through to gsConvert.pl by ConvertBinaryFile.pm
175 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
176
177 # set convert_to_plugin and convert_to_ext
178 $self->set_standard_convert_settings();
179
180 my $secondary_plugin_name = $self->{'convert_to_plugin'};
181 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
182
183 if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
184 $secondary_plugin_options->{$secondary_plugin_name} = [];
185 }
186 my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
187
188 push(@$specific_options, "-file_rename_method", "none");
189 push(@$specific_options, "-extract_language") if $self->{'extract_language'};
190
191 if ($secondary_plugin_name eq "HTMLPlugin") {
192 push(@$specific_options, "-processing_tmp_files");
193 push(@$specific_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
194 }
195 elsif ($secondary_plugin_name eq "PagedImagePlugin") {
196 push(@$specific_options, "-processing_tmp_files");
197 #is this true??
198 push(@$specific_options,"-input_encoding", "utf8");
199 }
200
201 $self = bless $self, $class;
202
203 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
204 return $self;
205}
206
207sub get_default_process_exp {
208 my $self = shift (@_);
209
210 if ($openoffice_available) {
211 return q^(?i)\.(ppt|pptx|odp)$^;
212 }
213
214 return q^(?i)\.ppt$^;
215}
216
217sub init {
218 my $self = shift (@_);
219
220 # ConvertBinaryFile init
221 $self->SUPER::init(@_);
222 $self->AutoLoadConverters::init();
223
224}
225
226sub begin {
227 my $self = shift (@_);
228
229 $self->AutoLoadConverters::begin();
230 $self->SUPER::begin(@_);
231
232}
233
234sub deinit {
235 my $self = shift (@_);
236
237 $self->AutoLoadConverters::deinit();
238 $self->SUPER::deinit(@_);
239
240}
241
242# override AutoLoadConverters version, as we need to do more stuff once its converted if we are converting to item file
243sub tmp_area_convert_file {
244 my $self = shift (@_);
245 my ($output_ext, $input_filename, $textref) = @_;
246
247 if ($self->{'openoffice_conversion'}) {
248 if ($self->{'convert_to'} eq "pagedimg") {
249 $output_ext = "html"; # first convert to html
250 }
251 my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
252 if ($result == 0) {
253 my $outhandle=$self->{'outhandle'};
254 print $outhandle "OpenOfficeConverter Conversion error\n";
255 print $outhandle $result_str;
256 return "";
257
258 }
259 #print STDERR "result = $result\n";
260 if ($self->{'convert_to'} eq "pagedimg") {
261 #my $item_filename = $self->generate_item_file($new_filename);
262 #return $item_filename;
263 return "/research/kjdon/home/gsdl/collect/openoffice/test.item";
264 }
265 return $new_filename;
266
267 }
268 else {
269 return $self->ConvertBinaryFile::tmp_area_convert_file(@_);
270 }
271 # get tmp filename
272}
273
274# override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed.
275sub read_XX {
276 my $self = shift (@_);
277 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
278
279 # can we process this file??
280 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
281
282 return undef unless $self->can_process_this_file($filename_full_path);
283
284 my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
285
286 if ((defined $process_status) && ($process_status == 1)) {
287
288 # process the document
289 $processor->process($doc_obj);
290
291 $self->{'num_processed'} ++;
292 undef $doc_obj;
293 }
294 # delete any temp files that we may have created
295 $self->clean_up_after_doc_obj_processing();
296
297
298 # if process_status == 1, then the file has been processed.
299 return $process_status;
300
301}
302
3031;
304
Note: See TracBrowser for help on using the repository browser.