source: gsdl/trunk/perllib/plugins/PowerPointPlugin.pm@ 20790

Last change on this file since 20790 was 20790, checked in by kjdon, 13 years ago

set -processing_tmp_files option to secondary HTML and PagedImage plugins so that the associated files in tmp are not stored as source associated files (used by incremental build to work out what needs reimporting)

  • Property svn:keywords set to Author Date Id Revision
File size: 5.2 KB
Line 
1###########################################################################
2#
3# PowerPointPlugin.pm -- plugin for importing Microsoft PowerPoint files.
4# (currently only versions 95 and 97)
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2002 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package PowerPointPlugin;
29
30use ConvertBinaryFile;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35sub BEGIN {
36 @PowerPointPlugin::ISA = ('ConvertBinaryFile');
37}
38
39my $convert_to_list =
40 [ { 'name' => "auto",
41 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
42 { 'name' => "html",
43 'desc' => "{ConvertBinaryFile.convert_to.html}" },
44 { 'name' => "text",
45 'desc' => "{ConvertBinaryFile.convert_to.text}" },
46 { 'name' => "pagedimg_jpg",
47 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
48 { 'name' => "pagedimg_gif",
49 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
50 { 'name' => "pagedimg_png",
51 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
52 ];
53
54my $arguments =
55 [ { 'name' => "process_exp",
56 'desc' => "{BasePlugin.process_exp}",
57 'type' => "regexp",
58 'reqd' => "no",
59 'deft' => &get_default_process_exp()}
60 ];
61
62my $options = { 'name' => "PowerPointPlugin",
63 'desc' => "{PowerPointPlugin.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'srcreplaceable' => "yes", # Source docs in PPT format can be replaced with GS-generated html
67 'args' => $arguments };
68
69sub new {
70 my ($class) = shift (@_);
71 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
72 push(@$pluginlist, $class);
73
74 if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
75 my $ws_arg =[{ 'name' => "convert_to",
76 'desc' => "{ConvertBinaryFile.convert_to}",
77 'type' => "enum",
78 'reqd' => "yes",
79 'list' => $convert_to_list,
80 'deft' => "html" },
81 { 'name' => "windows_scripting",
82 'desc' => "{PowerPointPlugin.windows_scripting}",
83 'type' => "flag",
84 'reqd' => "no" }
85 ];
86 push(@$arguments,@$ws_arg);
87 }
88
89 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
90 push(@{$hashArgOptLists->{"OptList"}},$options);
91
92
93 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
94
95 if ($self->{'info_only'}) {
96 # don't worry about any options etc
97 return bless $self, $class;
98 }
99
100 $self->{'filename_extension'} = "ppt";
101 $self->{'file_type'} = "PPT";
102
103 # ppthtml outputs utf-8 already.
104 #these are passed through to gsConvert.pl by ConvertBinaryFile.pm
105 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
106 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
107
108 if ($self->{'windows_scripting'} && ($self->{'convert_to'} =~ m/(pagedimage|pagedimg).*/i)) {
109 $secondary_plugin_options->{'PagedImagePlugin'} = [];
110 } else {
111 $secondary_plugin_options->{'HTMLPlugin'} = [];
112 $secondary_plugin_options->{'TextPlugin'} = [];
113 }
114 my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
115 my $text_options = $secondary_plugin_options->{'TextPlugin'};
116 my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
117
118 if (defined $html_options){
119 push(@$html_options,"-input_encoding", "utf8");
120 push(@$html_options,"-extract_language") if $self->{'extract_language'};
121 push(@$html_options,"-file_rename_method", "none");
122
123 push(@$html_options, "-processing_tmp_files");
124
125 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
126 # to extract these metadata fields from the HEAD META fields
127 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
128 }
129 if (defined $text_options){
130 push(@$text_options,"-input_encoding", "utf8");
131 push(@$text_options,"-extract_language") if $self->{'extract_language'};
132 push(@$text_options,"-file_rename_method", "none");
133 }
134 if (defined $pageimg_options){
135 push(@$pageimg_options,"-input_encoding", "utf8");
136 push(@$pageimg_options,"-extract_language") if $self->{'extract_language'};
137 push(@$pageimg_options,"-file_rename_method", "none");
138 push(@$pageimg_options, "-processing_tmp_files");
139 }
140
141 $self = bless $self, $class;
142
143 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
144 return $self;
145}
146
147sub get_default_process_exp {
148 my $self = shift (@_);
149 return q^(?i)\.ppt$^;
150}
151
1521;
153
Note: See TracBrowser for help on using the repository browser.