source: main/trunk/greenstone2/perllib/plugins/ExcelPlugin.pm@ 32341

Last change on this file since 32341 was 31492, checked in by kjdon, 7 years ago

renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes

  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1###########################################################################
2#
3# ExcelPlugin.pm -- plugin for importing Microsoft Excel files.
4# (basic version supports versions 95 and 97)
5# (through OpenOffice extension, supports all contempoary formats)
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2002 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29package ExcelPlugin;
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33no strict 'subs';
34use gsprintf 'gsprintf';
35
36use AutoLoadConverters;
37use ConvertBinaryFile;
38
39sub BEGIN {
40 @ExcelPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters');
41}
42
43my $openoffice_available = 0;
44
45my $arguments =
46 [ { 'name' => "process_exp",
47 'desc' => "{BaseImporter.process_exp}",
48 'type' => "regexp",
49 'reqd' => "no",
50 'deft' => "&get_default_process_exp()" # delayed (see below)
51 }
52 ];
53
54my $options = { 'name' => "ExcelPlugin",
55 'desc' => "{ExcelPlugin.desc}",
56 'abstract' => "no",
57 'inherits' => "yes",
58 'srcreplaceable' => "yes", # Source docs in Excel format can be replaced with GS-generated html
59 'args' => $arguments };
60
61sub new {
62 my ($class) = shift (@_);
63 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
64 push(@$pluginlist, $class);
65
66 # this bit needs to happen later after the arguments array has been
67 # finished - used for parsing the input args.
68 # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
69 # this one needs to go in first, to get the print info in the right order
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1);
73
74 if ($auto_converter_self->{'openoffice_available'}) {
75 $openoffice_available = 1;
76 }
77
78 # evaluate the default for process_exp - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args.
79 foreach my $a (@$arguments) {
80 if ($a->{'name'} eq "process_exp") {
81 my $eval_expr = $a->{'deft'};
82 $a->{'deft'} = eval "$eval_expr";
83 last;
84 }
85 }
86
87 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
88 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
89 my $self = BaseImporter::merge_inheritance($auto_converter_self, $cbf_self);
90
91
92 if ($self->{'info_only'}) {
93 # don't worry about any options etc
94 return bless $self, $class;
95 }
96
97 $self = bless $self, $class;
98 $self->{'file_type'} = "Excel";
99
100 my $outhandle = $self->{'outhandle'};
101
102 # check convert_to
103 if ($self->{'convert_to'} eq "auto") {
104 $self->{'convert_to'} = "html";
105 }
106
107 # set convert_to_plugin and convert_to_ext
108 $self->set_standard_convert_settings();
109
110 my $secondary_plugin_name = $self->{'convert_to_plugin'};
111 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
112
113 if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
114 $secondary_plugin_options->{$secondary_plugin_name} = [];
115 }
116 my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
117
118 push(@$specific_options,"-extract_language") if $self->{'extract_language'};
119 push(@$specific_options, "-file_rename_method", "none");
120
121 if ($secondary_plugin_name eq "HTMLPlugin") {
122 push(@$specific_options, "-processing_tmp_files");
123 }
124
125 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
126 return $self;
127}
128
129
130sub get_default_process_exp {
131 my $self = shift (@_);
132
133 if ($openoffice_available) {
134 return q^(?i)\.(xls|xlsx|ods)$^;
135 }
136
137 return q^(?i)\.xls$^;
138}
139
140sub init {
141 my $self = shift (@_);
142
143 # ConvertBinaryFile init
144 $self->SUPER::init(@_);
145 $self->AutoLoadConverters::init(@_);
146
147}
148
149sub begin {
150 my $self = shift (@_);
151
152 $self->AutoLoadConverters::begin(@_);
153 $self->SUPER::begin(@_);
154
155}
156
157sub deinit {
158 my $self = shift (@_);
159
160 $self->AutoLoadConverters::deinit(@_);
161 $self->SUPER::deinit(@_);
162
163}
164
165sub tmp_area_convert_file {
166
167 my $self = shift (@_);
168 return $self->AutoLoadConverters::tmp_area_convert_file(@_);
169
170}
171
172sub convert_post_process_old
173{
174 my $self = shift (@_);
175 my ($conv_filename) = @_;
176
177 my $outhandle=$self->{'outhandle'};
178
179 my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
180
181 # read in file ($text will be in utf8)
182 my $text = "";
183 $self->read_file ($conv_filename, $encoding, $language, \$text);
184
185 # turn any high bytes that aren't valid utf-8 into utf-8.
186 #unicode::ensure_utf8(\$text);
187
188 # Write it out again!
189 #$self->utf8_write_file (\$text, $conv_filename);
190}
191
1921;
Note: See TracBrowser for help on using the repository browser.