source: gs2-extensions/open-office-src/trunk/perllib/plugins/OpenOfficePlugin.pm@ 23220

Last change on this file since 23220 was 23220, checked in by kjdon, 13 years ago

added a few more file types that it can process, and made it hidden in GLI. Needs a bit more work before we advertise it.

File size: 8.2 KB
Line 
1###########################################################################
2#
3# OpenOfficePlugin.pm -- for processing standalone images
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package OpenOfficePlugin;
27
28use ConvertBinaryFile;
29use OpenOfficeConverter;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33no strict 'subs';
34
35use gsprintf 'gsprintf';
36
37sub BEGIN {
38 @OpenOfficePlugin::ISA = ('ConvertBinaryFile', 'OpenOfficeConverter');
39}
40
41my $word_pe = "doc|dot|docx|odt|wpd";
42my $rtf_pe = "rtf";
43my $ppt_pe = "ppt|pptx|odp";
44my $xls_pe = "xls|xlsx|ods";
45
46my $arguments =
47 [ { 'name' => "process_exp",
48 'desc' => "{BasePlugin.process_exp}",
49 'type' => "regexp",
50 'deft' => &get_default_process_exp(),
51 'reqd' => "no" },
52 ];
53
54# This is not quite fully functional, eg doesn't do proper PowerPoint processing. So I have made it hidden in gli for now.
55my $options = { 'name' => "OpenOfficePlugin",
56 'desc' => "{OpenOfficePlugin.desc}",
57 'abstract' => "no",
58 'inherits' => "yes",
59 'hiddengli' => "yes",
60 'args' => $arguments };
61
62
63sub new {
64 my ($class) = shift (@_);
65 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
66 push(@$pluginlist, $class);
67
68 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
69 push(@{$hashArgOptLists->{"OptList"}},$options);
70
71
72 my $ooc_self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists);
73 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
74
75 my $self = BasePlugin::merge_inheritance($ooc_self, $cbf_self);
76
77 if ($self->{'info_only'}) {
78 # don't worry about any options etc
79 return bless $self, $class;
80 }
81 if ($OpenOfficeConverter::openoffice_conversion_available) {
82 $self->{'openoffice_ext_working'} = 1;
83 }
84 else {
85 $self->{'openoffice_ext_working'} = 0;
86 }
87 $self->{'convert_to'} = "structuredhtml";
88
89 $self = bless $self, $class;
90
91 # set convert_to_plugin and convert_to_ext
92 $self->set_standard_convert_settings();
93
94 # set up appropriate secondary plugin options here!!!
95 my $secondary_plugin_name = $self->{'convert_to_plugin'};
96 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
97
98 if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
99 $secondary_plugin_options->{$secondary_plugin_name} = [];
100 }
101 my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
102
103 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
104
105 return $self;
106}
107
108sub init {
109 my $self = shift (@_);
110 my ($verbosity, $outhandle, $failhandle) = @_;
111
112 $self->SUPER::init(@_);
113 $self->OpenOfficeConverter::init();
114}
115
116sub begin {
117 my $self = shift (@_);
118 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
119
120 $self->SUPER::begin(@_);
121 $self->OpenOfficeConverter::begin(@_);
122}
123
124
125sub get_default_process_exp {
126 my $self = shift (@_);
127
128 return "(?i)\.($word_pe|$ppt_pe|$rtf_pe|$xls_pe)\$";
129}
130
131
132sub read_into_doc_obj {
133 my $self = shift (@_);
134 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
135
136 if (!$self->{'openoffice_ext_working'}) {
137 # use BasePlugin version to set up the doc obj, then we will do a little bit more in process
138 return $self->BasePlugin::read_into_doc_obj(@_);
139 }
140 # use convertbinaryfile version, will call tmp_area_convert_file and use
141 # secondary plugins
142 return $self->ConvertBinaryFile::read_into_doc_obj(@_);
143
144}
145
146# override this to use our own convert method
147sub tmp_area_convert_file {
148 my $self = shift (@_);
149 my ($output_ext, $input_filename, $textref) = @_;
150
151 my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
152 if ($result != 0) {
153 return $new_filename;
154 }
155 my $outhandle=$self->{'outhandle'};
156 print $outhandle "Open Office Conversion error\n";
157 print $outhandle $result_str;
158 return "";
159}
160
161# first return value is used in _iconxx_ to give a srcicon,
162# second return value is used for FileFormat metadata
163sub get_file_type_from_extension {
164 my $self = shift (@_);
165 my ($file) = @_;
166
167 #check against the various bit of process_exp
168 if ($file =~ /$word_pe/) {
169 return ("doc", "Word");
170 }
171 if ($file =~ /$ppt_pe/) {
172 return ("ppt", "PPT");
173 }
174 if ($file =~ /$xls_pe/) {
175 return ("xls", "Excel");
176 }
177 if ($file =~ /$rtf_pe/) {
178 return ("rtf", "RTF");
179 }
180
181 return ("unknown", "Unknown");
182}
183
184
185sub process {
186 my $self = shift (@_);
187 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
188
189 my $cursection = $doc_obj->get_top_section();
190
191 # store original file as associated file
192 my $filename = &util::filename_cat($base_dir, $file);
193 my $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
194 $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
195
196 my ($ext, $format) = $self->get_file_type_from_extension($file);
197 # overwrite the one set by secondary plugin
198 $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $format);
199
200 $doc_obj->add_utf8_metadata ($cursection, "srclink_file", "[SourceFile]");
201 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$ext."_");
202 # if oo conversion not available, we have no text, so add some
203 if (!$self->{'openoffice_ext_working'}) {
204 if ($gli) {
205 &gsprintf(STDERR, "<Warning p='openOfficePlugin' r='{OpenOfficeConverter.noconversionavailable}: {OpenOfficeConverter.".$self->{'no_openoffice_conversion_reason'}."}'>");
206 }
207 print STDERR "OpenOfficePlugin: no conversion available, just adding $file as is\n";
208 #we have no text - adds dummy text and NoText metadata
209 $self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
210
211 }
212}
213
214sub process_old {
215 my $self = shift (@_);
216 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
217 # old from here
218 # need to check that not empty
219 my $doc_ext = $self->{'filename_extension'};
220 my $file_type = "unknown";
221 $file_type = $self->{'file_type'} if defined $self->{'file_type'};
222
223 # associate original file with doc object
224 my $cursection = $doc_obj->get_top_section();
225 my $filename = &util::filename_cat($base_dir, $file);
226 my $assocfilename = "doc.$doc_ext";
227 if ($self->{'keep_original_filename'} == 1) {
228 # this should be the same filename that was used for the Source and SourceFile metadata,
229 # as we will use [SourceFile] in the srclink
230 $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
231 }
232 $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
233
234 # We use set instead of add here because we only want one value
235 $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $file_type);
236 my $srclink_filename = "doc.$doc_ext";
237 #my $doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/doc.$doc_ext\">";
238 if ($self->{'keep_original_filename'} == 1) {
239 $srclink_filename = "[SourceFile]";
240 #$doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[SourceFile]\">";
241 }
242 #$doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
243 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
244 #$doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
245 $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $srclink_filename);
246 return 1;
247
248}
Note: See TracBrowser for help on using the repository browser.