source: gsdl/trunk/perllib/plugins/TextPlugin.pm@ 15918

Last change on this file since 15918 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.2 KB
Line 
1###########################################################################
2#
3# TextPlugin.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document. Adds Title metadata
27# of first line of text (up to 100 characters long).
28
29# 12/05/02 Added usage datastructure - John Thompson
30
31package TextPlugin;
32
33use ReadTextFile;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37no strict 'subs';
38
39sub BEGIN {
40 @TextPlugin::ISA = ('ReadTextFile');
41}
42
43my $arguments =
44 [ { 'name' => "process_exp",
45 'desc' => "{BasePlugin.process_exp}",
46 'type' => "regexp",
47 'deft' => &get_default_process_exp(),
48 'reqd' => "no" } ,
49 { 'name' => "title_sub",
50 'desc' => "{TextPlugin.title_sub}",
51 'type' => "regexp",
52 'deft' => "",
53 'reqd' => "no" } ];
54
55my $options = { 'name' => "TextPlugin",
56 'desc' => "{TextPlugin.desc}",
57 'abstract' => "no",
58 'inherits' => "yes",
59 'srcreplaceable' => "yes", # Source docs in regular txt format can be replaced with GS-generated html
60 'args' => $arguments };
61
62
63sub new {
64 my ($class) = shift (@_);
65 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
66 push(@$pluginlist, $class);
67
68 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
69 push(@{$hashArgOptLists->{"OptList"}},$options);
70
71 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
72
73 return bless $self, $class;
74}
75
76sub get_default_process_exp {
77 my $self = shift (@_);
78
79 return q^(?i)\.te?xt$^;
80}
81
82# do plugin specific processing of doc_obj
83sub process {
84 my $self = shift (@_);
85 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
86 my $outhandle = $self->{'outhandle'};
87
88 print STDERR "<Processing n='$file' p='TextPlugin'>\n" if ($gli);
89 print $outhandle "TextPlugin xx: processing $file\n"
90 if $self->{'verbosity'} > 1;
91
92 my $cursection = $doc_obj->get_top_section();
93 # get title metadata
94 # (don't need to get title if it has been passed
95 # in from another plugin)
96 if (!defined $metadata->{'Title'}) {
97 my ($title) = $$textref;
98 $title =~ /^\s+/s;
99 if (defined $self->{'title_sub'} &&
100 $self->{'title_sub'}) {$title =~ s/$self->{'title_sub'}//;}
101 $title =~ /^\s*([^\n]*)/s; $title=$1;
102 if (length($title) > 100) {
103 $title = substr ($title, 0, 100) . "...";
104 }
105 $title =~ s/\[/&#91;/g;
106 $title =~ s/\[/&#93;/g;
107 $title =~ s/\</&#60;/g;
108 $title =~ s/\>/&#62;/g;
109 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
110 }
111 # Add FileFormat metadata
112 $doc_obj->add_metadata($cursection, "FileFormat", "Text");
113
114 # insert preformat tags and add text to document object
115 $self->text_to_html($textref); # modifies the text
116 $doc_obj->add_utf8_text($cursection, $$textref); #$doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
117
118 return 1;
119}
120
121sub text_to_html {
122 my $self = shift (@_);
123 my ($textref) = @_;
124
125 # we need to escape the escape character, or else mg will convert into
126 # eg literal newlines, instead of leaving the text as '\n'
127 $$textref =~ s/\\/\\\\/g; # macro language
128 $$textref =~ s/_/\\_/g; # macro language
129 $$textref =~ s/</&lt;/g;
130 $$textref =~ s/>/&gt;/g;
131
132 # insert preformat tags and add text to document object
133 $$textref = "<pre>\n$$textref\n</pre>";
134}
135
136
137# replace_srcdoc_with_html.pl requires all subroutines that support src_replaceable
138# to contain a method called tmp_area_convert_file - this is indeed the case with all
139# Perl modules that are subclasses of ConvertToPlug.pm, but as we want TextPlugin to also
140# be srcreplaceable and because TextPlugin does not inherit from ConvertToPlug.pm, we have
141# a similar subroutine with the same name here.
142sub tmp_area_convert_file {
143 my $self = shift (@_);
144 my ($output_ext, $input_filename) = @_;
145
146 my $outhandle = $self->{'outhandle'};
147 #my $failhandle = $self->{'failhandle'};
148
149 # derive output filename from input filename
150 my ($tailname, $dirname, $suffix)
151 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
152
153 # Softlink to collection tmp dir
154 # First find a temporary directory to create the output file in
155 my $tmp_dirname = $dirname;
156 if(defined $ENV{'GSDLCOLLECTDIR'}) {
157 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
158 } elsif(defined $ENV{'GSDLHOME'}) {
159 $tmp_dirname = $ENV{'GSDLHOME'};
160 }
161 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
162 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
163
164 # convert to utf-8 otherwise we have problems with the doc.xml file
165 # later on
166 &unicode::ensure_utf8(\$tailname); # TODO: does this change the filename or not?
167 # need to test this out on a windows computer using a Greek filename
168 $suffix = lc($suffix);
169 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
170
171 # Make sure we have the absolute path to the input file
172 # (If gsdl is remote, we're given relative path to input file, of the form import/tailname.suffix
173 # But we can't softlink to relative paths. Therefore, we need to ensure that
174 # the input_filename is the absolute path.
175 my $ensure_path_absolute = 1; # true
176
177 # Now make the softlink, so we don't accidentally damage the input file
178 # softlinking creates a symbolic link to (or, if that's not possible, it makes a copy of) the original
179 &util::soft_link($input_filename, $tmp_filename, $ensure_path_absolute);
180
181 my $verbosity = $self->{'verbosity'};
182 if ($verbosity > 0) {
183 # need this output statement, as GShell.java's runRemote() sets status to CANCELLED
184 # if there is no output! (Therefore, it only had this adverse affect when running GSDL remotely)
185 print $outhandle "Converting $tailname$suffix to html\n";
186 }
187
188 #my $output_filename = $tailname$output_ext; #output_ext has to be html!
189 my $output_filename = &util::filename_cat($tmp_dirname, $tailname.".html");
190
191 # Read contents of text file line by line into an array
192 # create an HTML file from the text file
193 # Recreate the original file for writing the updated contents
194 unless(open(TEXT, "<$tmp_filename")) { # open it as a new file for writing
195 print STDERR "TextPlugin.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";
196 return ""; # no file name
197 }
198
199 # Read the entire file at once
200 my $text;
201 {
202 local $/ = undef; # Now can read the entire file at once. From http://perl.plover.com/local.html
203 $text = <TEXT>; # File is read in as one single 'line'
204 }
205 close(TEXT); # close the file
206
207 # convert the text
208 $self->text_to_html(\$text);
209
210 #print STDERR "****output_filename: $output_filename\n";
211 #print STDERR "****text: $text\n";
212
213 # try creating this new file writing and try opening it for writing, else exit with error value
214 unless(open(HTML, ">$output_filename")) { # open the new html file for writing
215 print STDERR "TextPlugin.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";
216 return ""; # no filename
217 }
218 # write the html contents in text out to the file
219 print HTML $text;
220 close HTML;
221
222 # remove the copy of the original file/remove the symbolic link to original file
223 &util::rm($tmp_filename);
224
225 return $output_filename; # return the output file path
226}
227
228
2291;
Note: See TracBrowser for help on using the repository browser.