source: trunk/gsdl/perllib/plugins/WordPlug.pm@ 4785

Last change on this file since 4785 was 4744, checked in by mdewsnip, 21 years ago

Tidied up and structures (representing the options of the plugin) in preparation for removing the print_usage() routines.

  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
Line 
1###########################################################################
2#
3# WordPlug.pm -- plugin for importing Microsoft Word documents
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# 12/05/02 Added usage datastructure - John Thompson
27
28package WordPlug;
29
30use ConvertToPlug;
31
32sub BEGIN {
33 @ISA = ('ConvertToPlug');
34}
35
36my $arguments =
37 [ { 'name' => "process_exp",
38 'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",
39 'type' => "string",
40 'deft' => &get_default_process_exp(),
41 'reqd' => "no" } ];
42
43my $options = { 'name' => "WordPlug",
44 'desc' => "A plugin for importing Microsoft Word documents.",
45 'inherits' => "yes",
46 'args' => $arguments };
47
48sub new {
49 my $class = shift (@_);
50
51 my $self = new ConvertToPlug ($class, @_);
52
53 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
54 my $option_list = $self->{'option_list'};
55 push( @{$option_list}, $options );
56
57 # wvWare will always produce html files encoded as utf-8
58 if ($self->{'input_encoding'} eq "auto") {
59 $self->{'input_encoding'} = "utf8";
60 $self->{'extract_language'} = 1;
61 }
62
63 return bless $self, $class;
64}
65
66sub get_default_process_exp {
67 my $self = shift (@_);
68
69 return q^(?i)\.(doc|dot)$^;
70}
71
72# do plugin specific processing of doc_obj for HTML type
73sub process {
74 my $self = shift (@_);
75
76 my $outhandle = $self->{'outhandle'};
77 print $outhandle "WordPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
78 if $self->{'verbosity'} > 1;
79
80 return ConvertToPlug::process_type($self,"doc",@_);
81}
82
831;
Note: See TracBrowser for help on using the repository browser.