source: gsdl/trunk/perllib/plugins/PowerPointPlugin.pm@ 18406

Last change on this file since 18406 was 18406, checked in by ak19, 13 years ago

Modified srcreplaceable plugins (plugins which operate on docs where the source file can be replaced with their converted htmls) to set the file_rename_method to none for secondary plugins (for Text, HTML, and PagedImage plugins) so that the file is not renamed several times.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line 
1###########################################################################
2#
3# PowerPointPlugin.pm -- plugin for importing Microsoft PowerPoint files.
4# (currently only versions 95 and 97)
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2002 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package PowerPointPlugin;
29
30use ConvertBinaryFile;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35sub BEGIN {
36 @PowerPointPlugin::ISA = ('ConvertBinaryFile');
37}
38
39my $convert_to_list =
40 [ { 'name' => "auto",
41 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
42 { 'name' => "html",
43 'desc' => "{ConvertBinaryFile.convert_to.html}" },
44 { 'name' => "text",
45 'desc' => "{ConvertBinaryFile.convert_to.text}" },
46 { 'name' => "pagedimg_jpg",
47 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
48 { 'name' => "pagedimg_gif",
49 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
50 { 'name' => "pagedimg_png",
51 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
52 ];
53
54my $arguments =
55 [ { 'name' => "process_exp",
56 'desc' => "{BasePlugin.process_exp}",
57 'type' => "regexp",
58 'reqd' => "no",
59 'deft' => &get_default_process_exp()}
60 ];
61
62my $options = { 'name' => "PowerPointPlugin",
63 'desc' => "{PowerPointPlugin.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'srcreplaceable' => "yes", # Source docs in PPT format can be replaced with GS-generated html
67 'args' => $arguments };
68
69sub new {
70 my ($class) = shift (@_);
71 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
72 push(@$pluginlist, $class);
73
74 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
75 my $ws_arg =[{ 'name' => "convert_to",
76 'desc' => "{ConvertBinaryFile.convert_to}",
77 'type' => "enum",
78 'reqd' => "yes",
79 'list' => $convert_to_list,
80 'deft' => "html" },
81 { 'name' => "windows_scripting",
82 'desc' => "{PowerPointPlugin.windows_scripting}",
83 'type' => "flag",
84 'reqd' => "no" }
85 ];
86 push(@$arguments,@$ws_arg);
87 }
88
89 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
90 push(@{$hashArgOptLists->{"OptList"}},$options);
91
92
93 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
94
95 if ($self->{'info_only'}) {
96 # don't worry about any options etc
97 return bless $self, $class;
98 }
99
100 $self->{'filename_extension'} = "ppt";
101 $self->{'file_type'} = "PPT";
102
103 # ppthtml outputs utf-8 already.
104 #these are passed through to gsConvert.pl by ConvertBinaryFile.pm
105 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
106 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
107
108 if ($self->{'windows_scripting'} && ($self->{'convert_to'} =~ m/(pagedimage|pagedimg).*/i)) {
109 $secondary_plugin_options->{'PagedImagePlugin'} = [];
110 } else {
111 $secondary_plugin_options->{'HTMLPlugin'} = [];
112 $secondary_plugin_options->{'TextPlugin'} = [];
113 }
114 my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
115 my $text_options = $secondary_plugin_options->{'TextPlugin'};
116 my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
117
118 if (defined $html_options){
119 push(@$html_options,"-input_encoding", "utf8");
120 push(@$html_options,"-extract_language") if $self->{'extract_language'};
121 push(@$html_options,"-file_rename_method", "none");
122
123 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
124 # to extract these metadata fields from the HEAD META fields
125 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
126 }
127 if (defined $text_options){
128 push(@$text_options,"-input_encoding", "utf8");
129 push(@$text_options,"-extract_language") if $self->{'extract_language'};
130 push(@$text_options,"-file_rename_method", "none");
131 }
132 if (defined $pageimg_options){
133 push(@$pageimg_options,"-input_encoding", "utf8");
134 push(@$pageimg_options,"-extract_language") if $self->{'extract_language'};
135 push(@$pageimg_options,"-file_rename_method", "none");
136 }
137
138 $self = bless $self, $class;
139
140 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
141 return $self;
142}
143
144sub get_default_process_exp {
145 my $self = shift (@_);
146 return q^(?i)\.ppt$^;
147}
148
1491;
150
Note: See TracBrowser for help on using the repository browser.