source: gsdl/trunk/perllib/plugins/PPTPlugin.pm@ 15880

Last change on this file since 15880 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1###########################################################################
2#
3# PPTPlugin.pm -- plugin for importing Microsoft PowerPoint files.
4# (currently only versions 95 and 97)
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2002 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package PPTPlugin;
29
30use ConvertBinaryFile;
31use ReadTextFile; # for read_file in convert_post_process. do we need it?
32
33use strict;
34no strict 'refs'; # allow filehandles to be variables and viceversa
35
36sub BEGIN {
37 @PPTPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
38}
39
40my $convert_to_list =
41 [ { 'name' => "auto",
42 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
43 { 'name' => "html",
44 'desc' => "{ConvertBinaryFile.convert_to.html}" },
45 { 'name' => "text",
46 'desc' => "{ConvertBinaryFile.convert_to.text}" },
47 { 'name' => "pagedimg_jpg",
48 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
49 { 'name' => "pagedimg_gif",
50 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
51 { 'name' => "pagedimg_png",
52 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
53 ];
54
55my $arguments =
56 [ { 'name' => "process_exp",
57 'desc' => "{BasePlugin.process_exp}",
58 'type' => "regexp",
59 'reqd' => "no",
60 'deft' => &get_default_process_exp()}
61 ];
62
63my $options = { 'name' => "PPTPlugin",
64 'desc' => "{PPTPlugin.desc}",
65 'abstract' => "no",
66 'inherits' => "yes",
67 'srcreplaceable' => "yes", # Source docs in PPT format can be replaced with GS-generated html
68 'args' => $arguments };
69
70sub new {
71 my ($class) = shift (@_);
72 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
73 push(@$pluginlist, $class);
74
75 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
76 my $ws_arg =[{ 'name' => "convert_to",
77 'desc' => "{ConvertBinaryFile.convert_to}",
78 'type' => "enum",
79 'reqd' => "yes",
80 'list' => $convert_to_list,
81 'deft' => "html" },
82 { 'name' => "windows_scripting",
83 'desc' => "{PPTPlugin.windows_scripting}",
84 'type' => "flag",
85 'reqd' => "no" }
86 ];
87 push(@$arguments,@$ws_arg);
88 }
89
90 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
91 push(@{$hashArgOptLists->{"OptList"}},$options);
92
93
94 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
95
96 if ($self->{'info_only'}) {
97 # don't worry about any options etc
98 return bless $self, $class;
99 }
100
101 $self->{'filename_extension'} = "ppt";
102 $self->{'file_type'} = "PPT";
103
104 # ppthtml outputs utf-8 already.
105 #these are passed through to gsConvert.pl by ConvertBinaryFile.pm
106 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
107 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
108
109 if ($self->{'windows_scripting'} && ($self->{'convert_to'} eq "PagedImg")) {
110 $secondary_plugin_options->{'PagedImagePlugin'} = [];
111 } else {
112 $secondary_plugin_options->{'HTMLPlugin'} = [];
113 }
114 my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
115 my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
116
117 if ($self->{'input_encoding'} eq "auto") {
118 $self->{'input_encoding'} = "utf8";
119 if (defined $secondary_plugin_options->{'HTMLPlugin'}){
120 push(@$html_options,"-input_encoding", "utf8");
121 push(@$html_options,"-extract_language") if $self->{'extract_language'};
122
123 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
124 # to extract these metadata fields from the HEAD META fields
125 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
126 }
127 if (defined $secondary_plugin_options->{'PagedImagePlugin'}){
128 push(@$pageimg_options,"-input_encoding", "utf8");
129 push(@$pageimg_options,"-extract_language") if $self->{'extract_language'};
130 }
131 }
132
133 $self = bless $self, $class;
134
135 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
136 return $self;
137}
138
139sub get_default_process_exp {
140 my $self = shift (@_);
141 return q^(?i)\.ppt$^;
142}
143
144# do we need this? above states that ppthtml produces utf8 text...
145sub convert_post_process
146{
147 my $self = shift (@_);
148 my ($conv_filename) = @_;
149
150 my $outhandle=$self->{'outhandle'};
151 my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
152 # read in file ($text will be in utf8)
153 my $text = "";
154 $self->read_file ($conv_filename, $encoding, $language, \$text);
155
156 # turn any high bytes that aren't valid utf-8 into utf-8.
157 unicode::ensure_utf8(\$text);
158 # Write it out again!
159 $self->utf8_write_file (\$text, $conv_filename);
160}
161
162
1631;
164
Note: See TracBrowser for help on using the repository browser.