root/gs2-extensions/open-office-src/trunk/perllib/plugins/OpenOfficePlugin.pm @ 23220

Revision 23220, 8.2 KB (checked in by kjdon, 9 years ago)

added a few more file types that it can process, and made it hidden in GLI. Needs a bit more work before we advertise it.

Line 
1###########################################################################
2#
3# OpenOfficePlugin.pm -- for processing standalone images
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package OpenOfficePlugin;
27
28use ConvertBinaryFile;
29use OpenOfficeConverter;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33no strict 'subs';
34
35use gsprintf 'gsprintf';
36
37sub BEGIN {
38    @OpenOfficePlugin::ISA = ('ConvertBinaryFile', 'OpenOfficeConverter');
39}
40
41my $word_pe = "doc|dot|docx|odt|wpd";
42my $rtf_pe = "rtf";
43my $ppt_pe = "ppt|pptx|odp";
44my $xls_pe = "xls|xlsx|ods";
45
46my $arguments =
47    [ { 'name' => "process_exp",
48    'desc' => "{BasePlugin.process_exp}",
49    'type' => "regexp",
50    'deft' => &get_default_process_exp(),
51    'reqd' => "no" },
52      ];
53
54# This is not quite fully functional, eg doesn't do proper PowerPoint processing. So I have made it hidden in gli for now.
55my $options = { 'name'     => "OpenOfficePlugin",
56        'desc'     => "{OpenOfficePlugin.desc}",
57        'abstract' => "no",
58        'inherits' => "yes",
59        'hiddengli' => "yes",
60        'args'     => $arguments };
61
62
63sub new {
64    my ($class) = shift (@_);
65    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
66    push(@$pluginlist, $class);
67
68    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
69    push(@{$hashArgOptLists->{"OptList"}},$options);
70
71   
72    my $ooc_self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists);
73    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
74
75    my $self = BasePlugin::merge_inheritance($ooc_self, $cbf_self);
76
77    if ($self->{'info_only'}) {
78    # don't worry about any options etc
79    return bless $self, $class;
80    }
81    if ($OpenOfficeConverter::openoffice_conversion_available) {
82    $self->{'openoffice_ext_working'} = 1;
83    }
84    else {
85    $self->{'openoffice_ext_working'} = 0;
86    }
87    $self->{'convert_to'} = "structuredhtml";
88
89    $self = bless $self, $class;
90
91    # set convert_to_plugin and convert_to_ext
92    $self->set_standard_convert_settings();
93
94    # set up appropriate secondary plugin options here!!!
95    my $secondary_plugin_name = $self->{'convert_to_plugin'};
96    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
97
98    if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
99    $secondary_plugin_options->{$secondary_plugin_name} = [];
100    }
101    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
102
103    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
104
105    return $self;
106}
107
108sub init {
109    my $self = shift (@_);
110    my ($verbosity, $outhandle, $failhandle) = @_;
111
112    $self->SUPER::init(@_);
113    $self->OpenOfficeConverter::init();
114}
115
116sub begin {
117    my $self = shift (@_);
118    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
119
120    $self->SUPER::begin(@_);
121    $self->OpenOfficeConverter::begin(@_);
122}
123
124
125sub get_default_process_exp {
126    my $self = shift (@_);
127
128    return "(?i)\.($word_pe|$ppt_pe|$rtf_pe|$xls_pe)\$";
129}
130
131
132sub read_into_doc_obj {
133    my $self = shift (@_); 
134    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
135   
136    if (!$self->{'openoffice_ext_working'}) {
137    # use BasePlugin version to set up the doc obj, then we will do a little bit more in process
138    return $self->BasePlugin::read_into_doc_obj(@_);
139    }
140    # use convertbinaryfile version, will call tmp_area_convert_file and use
141    # secondary plugins
142    return $self->ConvertBinaryFile::read_into_doc_obj(@_);
143
144}
145
146# override this to use our own convert method
147sub tmp_area_convert_file {
148    my $self = shift (@_);
149    my ($output_ext, $input_filename, $textref) = @_;
150
151    my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
152    if ($result != 0) {
153    return $new_filename;
154    }
155    my $outhandle=$self->{'outhandle'};
156    print $outhandle "Open Office Conversion error\n";
157    print $outhandle $result_str;
158    return "";
159}
160
161# first return value is used in _iconxx_ to give a srcicon,
162# second return value is used for FileFormat metadata
163sub get_file_type_from_extension {
164    my $self = shift (@_);
165    my ($file) = @_;
166
167    #check against the various bit of process_exp
168    if ($file =~ /$word_pe/) {
169    return ("doc", "Word");
170    }
171    if ($file =~ /$ppt_pe/) {
172    return ("ppt", "PPT");
173    }
174    if ($file =~ /$xls_pe/) {
175    return ("xls", "Excel");
176    }
177    if ($file =~ /$rtf_pe/) {
178    return ("rtf", "RTF");
179    }
180   
181    return ("unknown", "Unknown");
182}
183
184
185sub process {
186    my $self = shift (@_);
187    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
188
189    my $cursection = $doc_obj->get_top_section();
190   
191    # store original file as associated file
192    my $filename = &util::filename_cat($base_dir, $file);
193    my $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
194    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
195
196    my ($ext, $format) = $self->get_file_type_from_extension($file);
197    # overwrite the one set by secondary plugin
198    $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $format);
199
200    $doc_obj->add_utf8_metadata ($cursection, "srclink_file", "[SourceFile]");
201    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icon".$ext."_");   
202    # if oo conversion not available, we have no text, so add some
203    if (!$self->{'openoffice_ext_working'}) {
204    if ($gli) {
205        &gsprintf(STDERR, "<Warning p='openOfficePlugin' r='{OpenOfficeConverter.noconversionavailable}: {OpenOfficeConverter.".$self->{'no_openoffice_conversion_reason'}."}'>");
206    }
207    print STDERR "OpenOfficePlugin: no conversion available, just adding $file as is\n";
208    #we have no text - adds dummy text and NoText metadata
209    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
210   
211    }
212}
213
214sub process_old {
215    my $self = shift (@_);
216    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
217    # old from here
218   # need to check that not empty
219    my $doc_ext = $self->{'filename_extension'};
220    my $file_type = "unknown";
221    $file_type = $self->{'file_type'} if defined $self->{'file_type'};
222   
223    # associate original file with doc object
224    my $cursection = $doc_obj->get_top_section();
225    my $filename = &util::filename_cat($base_dir, $file);
226    my $assocfilename = "doc.$doc_ext";
227    if ($self->{'keep_original_filename'} == 1) {
228    # this should be the same filename that was used for the Source and SourceFile metadata,
229    # as we will use [SourceFile] in the srclink
230    $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
231    }
232    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
233
234    # We use set instead of add here because we only want one value
235    $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $file_type);
236    my $srclink_filename = "doc.$doc_ext";
237    #my $doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/doc.$doc_ext\">";
238    if ($self->{'keep_original_filename'} == 1) {
239    $srclink_filename = "[SourceFile]";
240    #$doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[SourceFile]\">";
241    }
242    #$doc_obj->add_utf8_metadata ($cursection, "srclink",  $doclink);
243    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icon".$doc_ext."_");
244    #$doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
245    $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $srclink_filename);
246    return 1;
247
248}
Note: See TracBrowser for help on using the browser.