source: trunk/gsdl/perllib/plugins/TEXTPlug.pm@ 4744

Last change on this file since 4744 was 4744, checked in by mdewsnip, 21 years ago

Tidied up and structures (representing the options of the plugin) in preparation for removing the print_usage() routines.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1###########################################################################
2#
3# TEXTPlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document. Adds Title metadata
27# of first line of text (up to 100 characters long).
28
29# 12/05/02 Added usage datastructure - John Thompson
30
31package TEXTPlug;
32
33use BasPlug;
34use parsargv;
35
36
37sub BEGIN {
38 @ISA = ('BasPlug');
39}
40
41my $arguments =
42 [ { 'name' => "process_exp",
43 'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",
44 'type' => "string",
45 'deft' => &get_default_process_exp(),
46 'reqd' => "no" } ,
47 { 'name' => "title_sub",
48 'desc' => "Substitution expression to modify string stored as Title. Used by, for example, PSPlug to remove \"Page 1\" etc from text used as the title.",
49 'type' => "string",
50 'deft' => "",
51 'reqd' => "no" } ];
52
53my $options = { 'name' => "TEXTPlug",
54 'desc' => "Creates simple single-level document. Adds Title metadata of first line of text (up to 100 characters long).",
55 'inherits' => "yes",
56 'args' => $arguments };
57
58sub print_usage {
59 print STDERR "\n usage: plugin TEXTPlug [options]\n\n";
60 print STDERR " options:\n";
61 print STDERR " -title_sub\t Substitution expression to modify string stored as Title.\n";
62 print STDERR "\t\t Used by, for example, PSPlug to remove \"Page 1\" etc from\n";
63 print STDERR "\t\t text used as the title.\n";
64
65 print STDERR "\n";
66}
67
68sub new {
69 my ($class) = @_;
70 my $self = new BasPlug ($class, @_);
71
72 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
73 my $option_list = $self->{'option_list'};
74 push( @{$option_list}, $options );
75
76 if (!parsargv::parse(\@_,
77 q^title_sub/.*/^, \$self->{'title_sub'},
78 "allow_extra_options")) {
79 print STDERR "\nIncorrect options passed to TEXTPlug, check your collect.cfg configuration file\n";
80 &print_usage();
81 die "\n";
82 }
83
84
85 return bless $self, $class;
86}
87
88sub get_default_process_exp {
89 my $self = shift (@_);
90
91 return q^(?i)\.te?xt$^;
92}
93
94# do plugin specific processing of doc_obj
95sub process {
96 my $self = shift (@_);
97 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
98 my $outhandle = $self->{'outhandle'};
99
100 print $outhandle "TEXTPlug: processing $file\n"
101 if $self->{'verbosity'} > 1;
102
103 my $cursection = $doc_obj->get_top_section();
104
105 # get title metadata
106 # (don't need to get title if it has been passed
107 # in from another plugin)
108 if (!defined $metadata->{'Title'}) {
109 my ($title) = $$textref;
110 $title =~ /^\s+/s;
111 if (defined $self->{'title_sub'} &&
112 $self->{'title_sub'}) {$title =~ s/$self->{'title_sub'}//;}
113 $title =~ /^\s*([^\n]*)/s; $title=$1;
114 if (length($title) > 100) {
115 $title = substr ($title, 0, 100) . "...";
116 }
117 $title =~ s/\[/[/g;
118 $title =~ s/\[/]/g;
119 $title =~ s/\</&#60;/g;
120 $title =~ s/\>/&#62;/g;
121 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
122 }
123
124 # we need to escape the escape character, or else mg will convert into
125 # eg literal newlines, instead of leaving the text as '\n'
126 $$textref =~ s/\\/\\\\/g; # macro language
127 $$textref =~ s/_/\\_/g; # macro language
128 $$textref =~ s/</&lt;/g;
129 $$textref =~ s/>/&gt;/g;
130
131 # insert preformat tags and add text to document object
132 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
133
134 return 1;
135}
136
1371;
138
139
140
141
142
143
144
145
146
147
148
Note: See TracBrowser for help on using the repository browser.