source: trunk/gsdl/perllib/plugins/ConvertToPlug.pm@ 1435

Last change on this file since 1435 was 1435, checked in by davidb, 24 years ago

Rearrangement of ConvertTo inheritence so HTMLPlug and TextPlug do not need
to know anything about the conversion process.

  • Property svn:keywords set to Author Date Id Revision
File size: 6.6 KB
Line 
1###########################################################################
2#
3# ConvertToPlug.pm -- plugin that inherits from HTML or TEXT Plug, depending
4# on plugin argument convert_to
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# The plugin is inherited by such plugins as WordPlug and PDFPlug.
29# It facilitates the conversion of these document types to either HTML
30# or TEXT by setting up variable that instruct ConvertToBasPlug
31# how to work.
32
33# It works by dynamically inheriting HTMLPlug or TEXTPlug based on
34# the plugin argument 'convert_to'. If the argument is not present,
35# the default is to inherit HTMLPlug.
36
37
38package ConvertToPlug;
39
40use HTMLPlug;
41use TEXTPlug;
42
43sub BEGIN {
44 @ISA = ('HTMLPlug', 'TEXTPlug');
45}
46
47use strict;
48
49sub print_usage {
50 my ($plugin_name) = @_;
51
52 print STDERR "\n usage: plugin $plugin_name [options]\n\n";
53 print STDERR " options:\n";
54 print STDERR " -convert_to (html|text) plugin converts to TEXT or HTML\n";
55 print STDERR " (default html)\n";
56}
57
58sub parse_args
59{
60 my $class = shift (@_);
61 my ($args) = @_;
62
63 my $plugin_name = $class;
64 $plugin_name =~ s/\.pm$//;
65
66 my $generate_format;
67 if (!parsargv::parse($args,
68 q^convert_to/(html|text)/html^, \$generate_format,
69 "allow_extra_options")) {
70
71 print STDERR "\nIncorrect options passed to $plugin_name, ";
72 print STDERR "check your collect.cfg configuration file\n";
73 &print_usage($plugin_name);
74 die "\n";
75 }
76
77 return ($plugin_name,$generate_format);
78}
79
80sub new {
81 my $class = shift (@_);
82
83 my ($plugin_name,$generate_format) = $class->parse_args(\@_);
84
85 my $self;
86
87 if ($generate_format eq "text")
88 {
89 $self = new TEXTPlug ($class, @_);
90 $self->{'convert_to'} = "TEXT";
91 $self->{'convert_to_ext'} = "txt";
92 }
93 else
94 {
95 $self = new HTMLPlug ($class, @_);
96 $self->{'convert_to'} = "HTML";
97 $self->{'convert_to_ext'} = "html";
98 }
99
100 return bless $self, $class;
101}
102
103
104
105# Run conversion utility on the input file. Output files generated in
106# collection specific 'tmp' directory.
107# The collection tmp area is used to convert Word, PDF etc documents into
108# another format (such as text or HTML) suitable for reading and indexing
109
110sub tmp_area_convert_file {
111 my $self = shift (@_);
112 my ($output_ext,$input_filename, $textref) = @_;
113
114 # softlink to collection tmp dir
115 my $colname = &util::use_collection();
116 my $tmp_dirname
117 = &util::filename_cat($ENV{'GSDLHOME'},"collect",$colname,"tmp");
118 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
119
120 # derive tmp filename from input filename
121 my ($tailname,$dirname,$suffix)
122 = File::Basename::fileparse($input_filename,'\..+');
123 my $tmp_filename = &util::filename_cat($tmp_dirname,"$tailname$suffix");
124
125 # Remove any white space from filename -- no risk of name collision, and
126 # makes latter conversion by utils simpler
127 $tmp_filename =~ s/\s+//g;
128
129 &util::soft_link($input_filename,$tmp_filename);
130
131 my $output_filename
132 = &util::filename_cat($tmp_dirname,"$tailname.$output_ext");
133 $output_filename =~ s/\s+//g;
134
135 my $verbosity = $self->{'verbosity'};
136 if ($verbosity>0)
137 {
138 print STDERR "Converting $tailname$suffix to $self->{'convert_to'}\n";
139 }
140
141 # run either gs2txt.pl or gs2html.pl to peform the requested conversion
142 if (system("gs2$output_ext.pl -verbose $verbosity \"$tmp_filename\"")>0)
143 {
144 print STDERR "Warning: unable to convert to $output_ext format\n";
145 }
146
147 # remove symbolic link to original file
148 &util::rm($tmp_filename);
149
150 return $output_filename;
151}
152
153
154# Remove collection specific tmp directory and all its contents.
155
156sub cleanup_tmp_area {
157 my $self = shift (@_);
158
159 my $colname = &util::use_collection();
160 my $tmp_dirname
161 = &util::filename_cat($ENV{'GSDLHOME'},"collect",$colname,"tmp");
162 &util::rm_r($tmp_dirname);
163 &util::mk_dir($tmp_dirname);
164}
165
166
167
168# Override BasPlug read_file
169
170sub read_file {
171 my $self = shift (@_);
172 my ($src_filename, $textref) = @_;
173
174 my $output_ext = $self->{'convert_to_ext'};
175 my $conv_filename = $self->tmp_area_convert_file($output_ext,$src_filename);
176 $self->{'conv_filename'} = $conv_filename;
177
178 BasPlug::read_file($self,$conv_filename,$textref);
179}
180
181
182# Override BasPlug read
183
184sub read {
185 my $self = shift (@_);
186
187 my $ret_val = BasPlug::read($self,@_);
188
189 $self->cleanup_tmp_area();
190
191 return $ret_val;
192}
193
194
195# do plugin specific processing of doc_obj for HTML type
196sub process_type {
197 my $self = shift (@_);
198 my ($doc_ext, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
199
200 my $conv_filename = $self->{'conv_filename'};
201 my $tmp_dirname = File::Basename::dirname($conv_filename);
202 my $tmp_tailname = File::Basename::basename($conv_filename);
203
204 my $convert_to = $self->{'convert_to'};
205 my $ret_val;
206
207 if ($convert_to eq "TEXT")
208 {
209 $ret_val = TEXTPlug::process($self,$textref,$pluginfo,
210 $tmp_dirname,$tmp_tailname,
211 $metadata,$doc_obj);
212 }
213 else
214 {
215 $ret_val = HTMLPlug::process($self,$textref,$pluginfo,
216 $tmp_dirname,$tmp_tailname,
217 $metadata,$doc_obj);
218 }
219
220 # associate original file with doc object
221 my $cursection = $doc_obj->get_top_section();
222 my $filename = &util::filename_cat($base_dir,$file);
223 $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
224
225 my $doclink = "<a href=_httpcollection_/index/assoc/[archivedir]/doc.$doc_ext>";
226 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
227 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
228 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
229
230 return $ret_val;
231}
232
2331;
Note: See TracBrowser for help on using the repository browser.