root/gs2-extensions/open-office-src/trunk/perllib/plugins/OpenOfficePlugin.pm @ 22610

Revision 22610, 8.0 KB (checked in by kjdon, 9 years ago)

new OpenOfficePlugin? that can be used in a collection. Inherits from ConvertBinaryFile? for all the secondary plugin stuff, and OpenOfficeConverter? for the conversion bit.

Line 
1###########################################################################
2#
3# OpenOfficePlugin.pm -- for processing standalone images
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package OpenOfficePlugin;
27
28use ConvertBinaryFile;
29use OpenOfficeConverter;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33no strict 'subs';
34
35use gsprintf 'gsprintf';
36
37sub BEGIN {
38    @OpenOfficePlugin::ISA = ('ConvertBinaryFile', 'OpenOfficeConverter');
39}
40
41my $word_pe = "doc|dot|docx|odt";
42my $ppt_pe = "ppt";
43
44my $arguments =
45    [ { 'name' => "process_exp",
46    'desc' => "{BasePlugin.process_exp}",
47    'type' => "regexp",
48    'deft' => &get_default_process_exp(),
49    'reqd' => "no" },
50      ];
51
52my $options = { 'name'     => "OpenOfficePlugin",
53        'desc'     => "{OpenOfficePlugin.desc}",
54        'abstract' => "no",
55        'inherits' => "yes",
56        'args'     => $arguments };
57
58
59sub new {
60    my ($class) = shift (@_);
61    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
62    push(@$pluginlist, $class);
63
64    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
65    push(@{$hashArgOptLists->{"OptList"}},$options);
66
67   
68    my $ooc_self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists);
69    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
70
71    my $self = BasePlugin::merge_inheritance($ooc_self, $cbf_self);
72
73    if ($self->{'info_only'}) {
74    # don't worry about any options etc
75    return bless $self, $class;
76    }
77    if ($OpenOfficeConverter::openoffice_conversion_available) {
78    $self->{'openoffice_ext_working'} = 1;
79    }
80    else {
81    $self->{'openoffice_ext_working'} = 0;
82    }
83    $self->{'convert_to'} = "structuredhtml";
84
85    $self = bless $self, $class;
86    print STDERR "launch cmd = $self->{'openoffice_launch_cmd'}\n";
87
88    # set convert_to_plugin and convert_to_ext
89    $self->set_standard_convert_settings();
90
91    # set up appropriate secondary plugin options here!!!
92    my $secondary_plugin_name = $self->{'convert_to_plugin'};
93    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
94
95    if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
96    $secondary_plugin_options->{$secondary_plugin_name} = [];
97    }
98    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
99
100    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
101
102    return $self;
103}
104
105sub init {
106    my $self = shift (@_);
107    my ($verbosity, $outhandle, $failhandle) = @_;
108
109    $self->SUPER::init(@_);
110    $self->OpenOfficeConverter::init();
111    $self->{'cover_image'} = 0; # makes no sense for images
112}
113
114sub begin {
115    my $self = shift (@_);
116    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
117
118    $self->SUPER::begin(@_);
119    $self->OpenOfficeConverter::begin(@_);
120}
121
122
123sub get_default_process_exp {
124    my $self = shift (@_);
125
126    #return q^(?i)\.(doc|dot|docx|odt)$^;
127    return "(?i)\.($word_pe|$ppt_pe)\$";
128}
129
130
131sub read_into_doc_obj {
132    my $self = shift (@_); 
133    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
134   
135    if (!$self->{'openoffice_ext_working'}) {
136    # use BasePlugin version to set up the doc obj, then we will do a little bit more in process
137    return $self->BasePlugin::read_into_doc_obj(@_);
138    }
139    # use convertbinaryfile version, will call tmp_area_convert_file and use
140    # secondary plugins
141    return $self->ConvertBinaryFile::read_into_doc_obj(@_);
142
143}
144
145# override this to use our own convert method
146sub tmp_area_convert_file {
147    my $self = shift (@_);
148    my ($output_ext, $input_filename, $textref) = @_;
149
150    my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
151    if ($result != 0) {
152    return $new_filename;
153    }
154    my $outhandle=$self->{'outhandle'};
155    print $outhandle "Open Office Conversion error\n";
156    print $outhandle $result_str;
157    return "";
158}
159
160sub get_file_type_from_extension {
161    my $self = shift (@_);
162    my ($file) = @_;
163
164    #check against the various bit of process_exp
165    if ($file =~ /$word_pe/) {
166    print STDERR "matched\n";
167    return ("doc", "Word");
168    }
169    if ($file =~ /$ppt_pe/) {
170    return ("ppt", "PowerPoint");
171    }
172    return ("unknown", "Unknown");
173}
174
175
176sub process {
177    my $self = shift (@_);
178    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
179
180    my $cursection = $doc_obj->get_top_section();
181   
182    # store original file as associated file
183    my $filename = &util::filename_cat($base_dir, $file);
184    my $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
185    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
186
187    my ($ext, $format) = $self->get_file_type_from_extension($file);
188    # overwrite the one set by secondary plugin
189    $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $format);
190
191    $doc_obj->add_utf8_metadata ($cursection, "srclink_file", "[SourceFile]");
192    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icon".$ext."_");   
193    # if oo conversion not available, we have no text, so add some
194    if (!$self->{'openoffice_ext_working'}) {
195    if ($gli) {
196        &gsprintf(STDERR, "<Warning p='openOfficePlugin' r='{OpenOfficeConverter.noconversionavailable}: {OpenOfficeConverter.".$self->{'no_openoffice_conversion_reason'}."}'>");
197    }
198    print STDERR "OpenOfficePlugin: no conversion available, just adding $file as is\n";
199    #we have no text - adds dummy text and NoText metadata
200    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
201   
202    }
203}
204
205sub process_old {
206    my $self = shift (@_);
207    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
208    # old from here
209   # need to check that not empty
210    my $doc_ext = $self->{'filename_extension'};
211    my $file_type = "unknown";
212    $file_type = $self->{'file_type'} if defined $self->{'file_type'};
213   
214    # associate original file with doc object
215    my $cursection = $doc_obj->get_top_section();
216    my $filename = &util::filename_cat($base_dir, $file);
217    my $assocfilename = "doc.$doc_ext";
218    if ($self->{'keep_original_filename'} == 1) {
219    # this should be the same filename that was used for the Source and SourceFile metadata,
220    # as we will use [SourceFile] in the srclink
221    $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
222    }
223    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
224
225    # We use set instead of add here because we only want one value
226    $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $file_type);
227    my $srclink_filename = "doc.$doc_ext";
228    #my $doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/doc.$doc_ext\">";
229    if ($self->{'keep_original_filename'} == 1) {
230    $srclink_filename = "[SourceFile]";
231    #$doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[SourceFile]\">";
232    }
233    #$doc_obj->add_utf8_metadata ($cursection, "srclink",  $doclink);
234    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icon".$doc_ext."_");
235    #$doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
236    $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $srclink_filename);
237    return 1;
238
239}
Note: See TracBrowser for help on using the browser.