source: gsdl/trunk/perllib/plugins/TextPlugin.pm@ 16104

Last change on this file since 16104 was 16104, checked in by kjdon, 16 years ago

tried to make the 'xxxplugin processing file' print statements more consistent. They are now done in read (or read_into_doc_obj) and not process

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.0 KB
Line 
1###########################################################################
2#
3# TextPlugin.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document. Adds Title metadata
27# of first line of text (up to 100 characters long).
28
29# 12/05/02 Added usage datastructure - John Thompson
30
31package TextPlugin;
32
33use ReadTextFile;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37no strict 'subs';
38
39sub BEGIN {
40 @TextPlugin::ISA = ('ReadTextFile');
41}
42
43my $arguments =
44 [ { 'name' => "process_exp",
45 'desc' => "{BasePlugin.process_exp}",
46 'type' => "regexp",
47 'deft' => &get_default_process_exp(),
48 'reqd' => "no" } ,
49 { 'name' => "title_sub",
50 'desc' => "{TextPlugin.title_sub}",
51 'type' => "regexp",
52 'deft' => "",
53 'reqd' => "no" } ];
54
55my $options = { 'name' => "TextPlugin",
56 'desc' => "{TextPlugin.desc}",
57 'abstract' => "no",
58 'inherits' => "yes",
59 'srcreplaceable' => "yes", # Source docs in regular txt format can be replaced with GS-generated html
60 'args' => $arguments };
61
62
63sub new {
64 my ($class) = shift (@_);
65 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
66 push(@$pluginlist, $class);
67
68 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
69 push(@{$hashArgOptLists->{"OptList"}},$options);
70
71 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
72
73 return bless $self, $class;
74}
75
76sub get_default_process_exp {
77 my $self = shift (@_);
78
79 return q^(?i)\.te?xt$^;
80}
81
82# do plugin specific processing of doc_obj
83sub process {
84 my $self = shift (@_);
85 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
86 my $outhandle = $self->{'outhandle'};
87
88 my $cursection = $doc_obj->get_top_section();
89 # get title metadata
90 # (don't need to get title if it has been passed
91 # in from another plugin)
92 if (!defined $metadata->{'Title'}) {
93 my ($title) = $$textref;
94 $title =~ /^\s+/s;
95 if (defined $self->{'title_sub'} &&
96 $self->{'title_sub'}) {$title =~ s/$self->{'title_sub'}//;}
97 $title =~ /^\s*([^\n]*)/s; $title=$1;
98 if (length($title) > 100) {
99 $title = substr ($title, 0, 100) . "...";
100 }
101 $title =~ s/\[/[/g;
102 $title =~ s/\[/]/g;
103 $title =~ s/\</&#60;/g;
104 $title =~ s/\>/&#62;/g;
105 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
106 }
107 # Add FileFormat metadata
108 $doc_obj->add_metadata($cursection, "FileFormat", "Text");
109
110 # insert preformat tags and add text to document object
111 $self->text_to_html($textref); # modifies the text
112 $doc_obj->add_utf8_text($cursection, $$textref); #$doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
113
114 return 1;
115}
116
117sub text_to_html {
118 my $self = shift (@_);
119 my ($textref) = @_;
120
121 # we need to escape the escape character, or else mg will convert into
122 # eg literal newlines, instead of leaving the text as '\n'
123 $$textref =~ s/\\/\\\\/g; # macro language
124 $$textref =~ s/_/\\_/g; # macro language
125 $$textref =~ s/</&lt;/g;
126 $$textref =~ s/>/&gt;/g;
127
128 # insert preformat tags and add text to document object
129 $$textref = "<pre>\n$$textref\n</pre>";
130}
131
132
133# replace_srcdoc_with_html.pl requires all subroutines that support src_replaceable
134# to contain a method called tmp_area_convert_file - this is indeed the case with all
135# Perl modules that are subclasses of ConvertToPlug.pm, but as we want TextPlugin to also
136# be srcreplaceable and because TextPlugin does not inherit from ConvertToPlug.pm, we have
137# a similar subroutine with the same name here.
138sub tmp_area_convert_file {
139 my $self = shift (@_);
140 my ($output_ext, $input_filename) = @_;
141
142 my $outhandle = $self->{'outhandle'};
143 #my $failhandle = $self->{'failhandle'};
144
145 # derive output filename from input filename
146 my ($tailname, $dirname, $suffix)
147 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
148
149 # Softlink to collection tmp dir
150 # First find a temporary directory to create the output file in
151 my $tmp_dirname = $dirname;
152 if(defined $ENV{'GSDLCOLLECTDIR'}) {
153 $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
154 } elsif(defined $ENV{'GSDLHOME'}) {
155 $tmp_dirname = $ENV{'GSDLHOME'};
156 }
157 $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
158 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
159
160 # convert to utf-8 otherwise we have problems with the doc.xml file
161 # later on
162 &unicode::ensure_utf8(\$tailname); # TODO: does this change the filename or not?
163 # need to test this out on a windows computer using a Greek filename
164 $suffix = lc($suffix);
165 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
166
167 # Make sure we have the absolute path to the input file
168 # (If gsdl is remote, we're given relative path to input file, of the form import/tailname.suffix
169 # But we can't softlink to relative paths. Therefore, we need to ensure that
170 # the input_filename is the absolute path.
171 my $ensure_path_absolute = 1; # true
172
173 # Now make the softlink, so we don't accidentally damage the input file
174 # softlinking creates a symbolic link to (or, if that's not possible, it makes a copy of) the original
175 &util::soft_link($input_filename, $tmp_filename, $ensure_path_absolute);
176
177 my $verbosity = $self->{'verbosity'};
178 if ($verbosity > 0) {
179 # need this output statement, as GShell.java's runRemote() sets status to CANCELLED
180 # if there is no output! (Therefore, it only had this adverse affect when running GSDL remotely)
181 print $outhandle "Converting $tailname$suffix to html\n";
182 }
183
184 #my $output_filename = $tailname$output_ext; #output_ext has to be html!
185 my $output_filename = &util::filename_cat($tmp_dirname, $tailname.".html");
186
187 # Read contents of text file line by line into an array
188 # create an HTML file from the text file
189 # Recreate the original file for writing the updated contents
190 unless(open(TEXT, "<$tmp_filename")) { # open it as a new file for writing
191 print STDERR "TextPlugin.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";
192 return ""; # no file name
193 }
194
195 # Read the entire file at once
196 my $text;
197 {
198 local $/ = undef; # Now can read the entire file at once. From http://perl.plover.com/local.html
199 $text = <TEXT>; # File is read in as one single 'line'
200 }
201 close(TEXT); # close the file
202
203 # convert the text
204 $self->text_to_html(\$text);
205
206 #print STDERR "****output_filename: $output_filename\n";
207 #print STDERR "****text: $text\n";
208
209 # try creating this new file writing and try opening it for writing, else exit with error value
210 unless(open(HTML, ">$output_filename")) { # open the new html file for writing
211 print STDERR "TextPlugin.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";
212 return ""; # no filename
213 }
214 # write the html contents in text out to the file
215 print HTML $text;
216 close HTML;
217
218 # remove the copy of the original file/remove the symbolic link to original file
219 &util::rm($tmp_filename);
220
221 return $output_filename; # return the output file path
222}
223
224
2251;
Note: See TracBrowser for help on using the repository browser.