source: gsdl/trunk/perllib/plugins/ExcelPlugin.pm@ 20790

Last change on this file since 20790 was 20790, checked in by kjdon, 13 years ago

set -processing_tmp_files option to secondary HTML and PagedImage plugins so that the associated files in tmp are not stored as source associated files (used by incremental build to work out what needs reimporting)

  • Property svn:keywords set to Author Date Id Revision
File size: 3.9 KB
Line 
1###########################################################################
2#
3# ExcelPlugin.pm -- plugin for importing Microsoft Excel files.
4# (currently only versions 95 and 97)
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2002 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package ExcelPlugin;
29
30use ConvertBinaryFile;
31use strict;
32no strict 'refs'; # allow filehandles to be variables and viceversa
33
34sub BEGIN {
35 @ExcelPlugin::ISA = ('ConvertBinaryFile');
36}
37
38my $arguments =
39 [ { 'name' => "process_exp",
40 'desc' => "{BasePlugin.process_exp}",
41 'type' => "regexp",
42 'reqd' => "no",
43 'deft' => &get_default_process_exp() }
44 ];
45
46my $options = { 'name' => "ExcelPlugin",
47 'desc' => "{ExcelPlugin.desc}",
48 'abstract' => "no",
49 'inherits' => "yes",
50 'srcreplaceable' => "yes", # Source docs in Excel format can be replaced with GS-generated html
51 'args' => $arguments };
52
53sub new {
54 my ($class) = shift (@_);
55 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
56 push(@$pluginlist, $class);
57
58 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
59 push(@{$hashArgOptLists->{"OptList"}},$options);
60
61 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
62
63 if ($self->{'info_only'}) {
64 # don't worry about any options etc
65 return bless $self, $class;
66 }
67
68 $self->{'filename_extension'} = "xls";
69 $self->{'file_type'} = "Excel";
70
71 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
72 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
73 $secondary_plugin_options->{'HTMLPlugin'} = [];
74 }
75 if (!defined $secondary_plugin_options->{'TextPlugin'}) {
76 $secondary_plugin_options->{'TextPlugin'} = [];
77 }
78 my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
79 my $text_options = $secondary_plugin_options->{'TextPlugin'};
80
81 push(@$html_options, "-input_encoding", "utf8");
82 push(@$html_options,"-extract_language") if $self->{'extract_language'};
83 push(@$html_options, "-file_rename_method", "none");
84 push(@$html_options, "-processing_tmp_files");
85
86 push(@$text_options, "-input_encoding", "utf8");
87 push(@$text_options,"-extract_language") if $self->{'extract_language'};
88 push(@$text_options, "-file_rename_method", "none");
89
90 $self = bless $self, $class;
91
92 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
93 return bless $self;
94}
95
96sub convert_post_process_old
97{
98 my $self = shift (@_);
99 my ($conv_filename) = @_;
100
101 my $outhandle=$self->{'outhandle'};
102
103 my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
104
105 # read in file ($text will be in utf8)
106 my $text = "";
107 $self->read_file ($conv_filename, $encoding, $language, \$text);
108
109 # turn any high bytes that aren't valid utf-8 into utf-8.
110 #unicode::ensure_utf8(\$text);
111
112 # Write it out again!
113 #$self->utf8_write_file (\$text, $conv_filename);
114}
115
116sub get_default_process_exp {
117 my $self = shift (@_);
118 return q^(?i)\.xls$^;
119}
120
121
1221;
Note: See TracBrowser for help on using the repository browser.