source: trunk/gsdl/perllib/plugins/PagedImgPlug.pm@ 7243

Last change on this file since 7243 was 7106, checked in by kjdon, 20 years ago

modified the comments at the top, made the size args have ranges > 0

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.8 KB
Line 
1###########################################################################
2#
3# PagedImgPlug.pm -- plugin for sets of images and OCR text that
4# make up a document
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27# PagedImgPlug
28# processes sequences of images, with optional OCR text
29#
30# This plugin takes .item files, which contain metadata and lists of image
31# files, and produces a document containing sections, one for each page.
32#
33# The format of the .item file is as follows:
34# The first lines contain any metadata for the whole document
35# <metadata-name>metadata-value
36# eg.
37# <Title>Snail farming
38# <Date>19230102
39# Then comes a list of pages, one page per line, each line has the format
40# pagenum:imagefile:textfile:r
41# page num and imagefile are required. pagenum is used for the Title
42# of the section, and in the display is shown as page <pagenum>.
43# imagefile is the image for the page. textfile is an optional text
44# file containing the OCR (or any) text for the page - this gets added
45# as the text for the section. r is optional, and signals that the image
46# should be rotated 180deg. Eg use this if the image has been made upside down.
47# So an example item file looks like:
48# <Title>Snail farming
49# <Date>19960403
50# 1:p1.gif:p1.txt:
51# 2:p2.gif::
52# 3:p3.gif:p3.txt:
53# 3b:p3b.gif:p3b.txt:r
54# The second page has no text, the fourth page is a back page, and
55# should be rotated.
56#
57# All the supplemetary image amd text files should be in the same folder as
58# the .item file.
59#
60# To display the images instead of the document text, you can use [srcicon]
61# in the DocumentText format statement.
62# For example,
63#
64# format DocumentText "<center><table width=_pagewidth_><tr><td>[srcicon]</td></tr></table></center>"
65
66# If you have used -screenview, you can also use [screenicon] in the format
67# statement to display the smaller image. Here is an example that switches
68# between the two:
69#
70# format DocumentText "<center><table width=_pagewidth_><tr><td>_If_('_cgiargp_' eq 'full',<a href='_httpdocument_&d=_cgiargd_&p=small'>Switch to small version.</a>,<a href='_httpdocument_&d=_cgiargd_&p=full'>Switch to fullsize version</a>)</td></tr><tr><td>_If_('_cgiargp_' eq 'full',<a href='_httpdocument_&d=_cgiargd_&p=small' title='Switch to small version'>[srcicon]</a>,<a href='_httpdocument_&d=_cgiargd_&p=full' title="Switch to fullsize version'>[screenicon]</a></td></tr></table></center>"
71#
72# Additional metadata can be added into the .item files, alternatively you can
73# use normal metadata.xml files, with the name of the .item file as the
74# FileName.
75
76package PagedImgPlug;
77
78use BasPlug;
79
80sub BEGIN {
81 @ISA = ('BasPlug');
82}
83
84my $type_list =
85 [ { 'name' => "paged",
86 'desc' => "{PagedImgPlug.documenttype.paged}" },
87 { 'name' => "hierarchy",
88 'desc' => "{PagedImgPlug.documenttype.hierarchy}" } ];
89
90my $arguments =
91 [ { 'name' => "process_exp",
92 'desc' => "{BasPlug.process_exp}",
93 'type' => "string",
94 'deft' => &get_default_process_exp(),
95 'reqd' => "no" },
96 { 'name' => "block_exp",
97 'desc' => "{BasPlug.block_exp}",
98 'type' => "string",
99 'deft' => &get_default_block_exp(),
100 'reqd' => "no" },
101 { 'name' => "noscaleup",
102 'desc' => "{ImagePlug.noscaleup}",
103 'type' => "flag",
104 'reqd' => "no" },
105 { 'name' => "thumbnail",
106 'desc' => "{PagedImgPlug.thumbnail}",
107 'type' => "flag",
108 'reqd' => "no" },
109 { 'name' => "thumbnailsize",
110 'desc' => "{ImagePlug.thumbnailsize}",
111 'type' => "int",
112 'deft' => "100",
113 'range' => "1,",
114 'reqd' => "no" },
115 { 'name' => "thumbnailtype",
116 'desc' => "{ImagePlug.thumbnailtype}",
117 'type' => "string",
118 'deft' => "gif",
119 'reqd' => "no" },
120 { 'name' => "screenview",
121 'desc' => "{PagedImgPlug.screenview}",
122 'type' => "flag",
123 'reqd' => "no" },
124 { 'name' => "screenviewsize",
125 'desc' => "{PagedImgPlug.screenviewsize}",
126 'type' => "int",
127 'deft' => "500",
128 'range' => "1,",
129 'reqd' => "no" },
130 { 'name' => "screenviewtype",
131 'desc' => "{PagedImgPlug.screenviewtype}",
132 'type' => "string",
133 'deft' => "jpg",
134 'reqd' => "no" },
135 { 'name' => "converttotype",
136 'desc' => "{ImagePlug.converttotype}",
137 'type' => "string",
138 'deft' => "",
139 'reqd' => "no" },
140 { 'name' => "minimumsize",
141 'desc' => "{ImagePlug.minimumsize}",
142 'type' => "int",
143 'deft' => "100",
144 'range' => "1,",
145 'reqd' => "no" },
146 { 'name' => "noheaderpage",
147 'desc' => "{PagedImgPlug.noheaderpage}",
148 'type' => "flag",
149 'reqd' => "no" },
150 { 'name' => "documenttype",
151 'desc' => "{PagedImgPlug.documenttype}",
152 'type' => "enum",
153 'list' => $type_list,
154 'deft' => "paged",
155 'reqd' => "no" } ];
156
157
158my $options = { 'name' => "PagedImgPlug",
159 'desc' => "{PagedImgPlug.desc}",
160 'inherits' => "yes",
161 'args' => $arguments };
162
163
164sub new {
165 my ($class) = @_;
166 my $plugin_name = shift (@_);
167 my $self = new BasPlug ("PagedImgPlug", @_);
168
169 my $option_list = $self->{'option_list'};
170 push( @{$option_list}, $options );
171
172 if (!parsargv::parse(\@_,
173 q^noscaleup^, \$self->{'noscaleup'},
174 q^converttotype/.*/^, \$self->{'converttotype'},
175 q^minimumsize/[0-9]*/100^, \$self->{'minimumsize'},
176
177 q^thumbnailsize/[0-9]*/100^, \$self->{'thumbnailsize'},
178 q^thumbnailtype/.*/gif^, \$self->{'thumbnailtype'},
179 q^screenviewsize/[0-9]*/0^, \$self->{'screenviewsize'},
180 q^screenviewtype/.*/jpg^, \$self->{'screenviewtype'},
181 q^thumbnail^, \$self->{'thumbnail'},
182 q^screenview^, \$self->{'screenview'},
183 q^noheaderpage^, \$self->{'noheaderpage'},
184 'documenttype/^(paged|hierarchy)$/paged', \$self->{'doctype'},
185 "allow_extra_options")) {
186
187 print STDERR "\nPagedImgPlug uses an incorrect option.\n";
188 print STDERR "Check your collect.cfg configuration file.\n";
189 $self->print_txt_usage(""); # Use default resource bundle
190 die "\n";
191 }
192
193 return bless $self, $class;
194}
195
196sub get_default_process_exp {
197 my $self = shift (@_);
198
199 return q^\.item$^;
200}
201
202# want to block everything except the .item ones
203# but instead we will block images and txt files
204sub get_default_block_exp {
205 my $self = shift (@_);
206
207 return q^(?i)(\.jpe?g|\.gif|\.png|\.tif?f|\.te?xt|~)$^
208}
209# Create the thumbnail and screenview images, and discover the Image's
210# size, width, and height using the convert utility.
211sub process_image {
212 my $self = shift (@_);
213 my $filename = shift (@_); # filename with full path
214 my $srcfile = shift (@_); # filename without path
215 my $doc_obj = shift (@_);
216 my $section = shift (@_); #the current section
217 my $rotate = shift (@_); # whether to rotate the image or not
218
219 my $top=0;
220 if ($section eq $doc_obj->get_top_section()) {
221 $top=1;
222 }
223 my $verbosity = $self->{'verbosity'};
224 my $outhandle = $self->{'outhandle'};
225
226 # check the filename is okay
227 return 0 if ($srcfile eq "" || $filename eq "");
228
229 my $minimumsize = $self->{'minimumsize'};
230 if (defined $minimumsize && (-s $filename < $minimumsize)) {
231 print $outhandle "PagedImgPlug: \"$filename\" too small, skipping\n"
232 if ($verbosity > 1);
233 }
234
235 # Convert the image to a new type (if required), and rotate if required.
236 my $converttotype = $self->{'converttotype'};
237 my $originalfilename = ""; # only set if we do a conversion
238 my $type = "unknown";
239 my $converted = 0;
240 my $rotated=0;
241 if ($converttotype ne "" && $filename !~ /$converttotype$/) {
242 $converted=1;
243 $originalfilename = $filename;
244 my $filehead = &util::get_tmp_filename();
245 $filename = $filehead . ".$converttotype";
246 $n = 1;
247 while (-e $filename) {
248 $filename = "$filehead$n\.$converttotype";
249 $n++;
250 }
251 $self->{'tmp_filename1'} = $filename;
252
253 my $rotate_option = "";
254 if ($rotate eq "r") {
255 $rotate_option = "-rotate 180 ";
256 }
257
258 my $command = "convert -verbose \"$originalfilename\" $rotate_option \"$filename\"";
259 print $outhandle "CONVERT: $command\n" if ($verbosity > 2);
260 my $result = '';
261 $result = `$command`;
262 print $outhandle "CONVERT RESULT = $result\n" if ($verbosity > 2);
263
264 $type = $converttotype;
265 } elsif ($rotate eq "r") {
266 $rotated=1;
267 $originalfilename = $filename;
268 $filename = &util::get_tmp_filename();
269
270 my $command = "convert \"$originalfilename\" -rotate 180 \"$filename\"";
271 print $outhandle "ROTATE: $command\n" if ($verbosity > 2);
272 my $result = '';
273 $result = `$command`;
274 print $outhandle "ROTATE RESULT = $result\n" if ($verbosity > 2);
275
276 }
277
278
279 # Add the image metadata
280 my $file; # the new file name
281 my ($id) = $srcfile =~ /^([^\.]*)/; #the new file name without an extension
282 if ($converted) {
283 # we have converted the image
284 # add on the new extension
285 $file .= "$id.$converttotype";
286 } else {
287 $file = $srcfile;
288 }
289
290 my $url =$file; # the new file name prepared for a url
291 my $srcurl = $srcfile;
292 $url =~ s/ /%20/g;
293 $srcurl =~ s/ /%20/g;
294
295 $doc_obj->add_metadata ($section, "Image", $url);
296
297 # Also want to set filename as 'Source' metadata to be
298 # consistent with other plugins
299 $doc_obj->add_metadata ($section, "Source", $srcurl);
300
301 my ($image_type, $image_width, $image_height, $image_size)
302 = &identify($filename, $outhandle, $verbosity);
303
304 $doc_obj->add_metadata ($section, "ImageType", $image_type);
305 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
306 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
307 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
308
309 if ($type eq "unknown" && $image_type) {
310 $type = $image_type;
311 }
312
313 if ($top) {
314 $doc_obj->add_metadata ($section, "srclink",
315 "<a href=_httpcollection_/index/assoc/[assocfilepath]/[Image]>");
316 $doc_obj->add_metadata ($section, "srcicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Image]>");
317
318 } else {
319 $doc_obj->add_metadata ($section, "srclink",
320 "<a href=_httpcollection_/index/assoc/[parent:assocfilepath]/[Image]>");
321 $doc_obj->add_metadata ($section, "srcicon", "<img src=_httpcollection_/index/assoc/[parent:assocfilepath]/[Image]>");
322
323 }
324 $doc_obj->add_metadata ($section, "/srclink", "</a>");
325
326
327 # Add the image as an associated file
328 $doc_obj->associate_file($filename,$file,"image/$type",$section);
329 print $outhandle "associating file $filename as name $file\n" if ($verbosity > 2);
330
331 if ($self->{'thumbnail'}) {
332 # Make the thumbnail image
333 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
334 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
335
336 my $filehead = &util::get_tmp_filename();
337 my $thumbnailfile = $filehead . ".$thumbnailtype";
338 my $n=1;
339 while (-e $thumbnailfile) {
340 $thumbnailfile = $filehead . $n . ".$thumbnailtype";
341 $n++;
342 }
343
344 $self->{'tmp_filename2'} = $thumbnailfile;
345
346 # Generate the thumbnail with convert
347 my $command = "convert -verbose -geometry $thumbnailsize"
348 . "x$thumbnailsize \"$filename\" \"$thumbnailfile\"";
349 print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2);
350 my $result = '';
351 $result = `$command 2>&1` ;
352 print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2);
353
354 # Add the thumbnail as an associated file ...
355 if (-e "$thumbnailfile") {
356 $doc_obj->associate_file("$thumbnailfile", $id."thumb.$thumbnailtype", "image/$thumbnailtype",$section);
357 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
358 $doc_obj->add_metadata ($section, "Thumb", $id."thumb.$thumbnailtype");
359
360 $doc_obj->add_metadata ($section, "thumbicon", "<img src=_httpcollection_/index/assoc/[parent:assocfilepath]/[Thumb] width=[ThumbWidth] height=[ThumbHeight]>");
361 }
362
363 # Extract Thumnail metadata from convert output
364 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
365 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
366 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
367 }
368 }
369 # Make a screen-sized version of the picture if requested
370 if ($self->{'screenview'}) {
371
372 # To do: if the actual image is smaller than the screenview size,
373 # we should use the original !
374
375 my $screenviewsize = $self->{'screenviewsize'} || 500;
376 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
377 my $filehead = &util::get_tmp_filename();
378 my $screenviewfilename = $filehead . ".$screenviewtype";
379 my $n=1;
380 while (-e $screenviewfilename) {
381 $screenviewfilename = "$filehead$n\.$screenviewtype";
382 $n++;
383 }
384 $self->{'tmp_filename3'} = $screenviewfilename;
385
386 # make the screenview image
387 my $command = "convert -verbose -geometry $screenviewsize"
388 . "x$screenviewsize \"$filename\" \"$screenviewfilename\"";
389 print $outhandle "SCREENVIEW: $command\n" if ($verbosity > 2);
390 my $result = "";
391 $result = `$command 2>&1` ;
392 print $outhandle "SCREENVIEW RESULT: $result\n" if ($verbosity > 3);
393
394 # get screenview dimensions, size and type
395 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
396 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
397 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
398 }
399
400 #add the screenview as an associated file ...
401 if (-e "$screenviewfilename") {
402 $doc_obj->associate_file("$screenviewfilename", $id."sv.$screenviewtype",
403 "image/$screenviewtype",$section);
404 print $outhandle "associating screen file $screenviewfilename as name $id sv.$screenviewtype\n" if ($verbosity > 2);
405
406 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
407 $doc_obj->add_metadata ($section, "Screen", $id."sv.$screenviewtype");
408
409 if ($top) {
410 $doc_obj->add_metadata ($section, "screenicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Screen] width=[ScreenWidth] height=[ScreenHeight]>");
411 } else {
412 $doc_obj->add_metadata ($section, "screenicon", "<img src=_httpcollection_/index/assoc/{If}{[parent:assocfilepath],[parent:assocfilepath],[assocfilepath]}/[Screen] width=[ScreenWidth] height=[ScreenHeight]>");
413
414 }
415 } else {
416 print $outhandle "PagedImgPlug: couldn't find \"$screenviewfilename\"\n";
417 }
418 }
419
420 return $type;
421
422
423}
424
425
426
427# Discover the characteristics of an image file with the ImageMagick
428# "identify" command.
429
430sub identify {
431 my ($image, $outhandle, $verbosity) = @_;
432
433 # Use the ImageMagick "identify" command to get the file specs
434 my $command = "identify $image 2>&1";
435 print $outhandle "$command\n" if ($verbosity > 2);
436 my $result = '';
437 $result = `$command`;
438 print $outhandle "$result\n" if ($verbosity > 3);
439
440 # Read the type, width, and height
441 my $type = 'unknown';
442 my $width = 'unknown';
443 my $height = 'unknown';
444
445 my $image_safe = quotemeta $image;
446 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
447 $type = $1;
448 $width = $2;
449 $height = $3;
450 }
451
452 # Read the size
453 my $size = "unknown";
454 if ($result =~ m/^.* ([0-9]+)b/) {
455 $size = $1;
456 } elsif ($result =~ m/^.* ([0-9]+)kb/) {
457 $size = 1024 * $1;
458 }
459
460 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
461 if ($verbosity > 3);
462
463 # Return the specs
464 return ($type, $width, $height, $size);
465}
466
467
468# The PagedImgPlug read() function. This function does all the right things
469# to make general options work for a given plugin. It calls the process()
470# function which does all the work specific to a plugin (like the old
471# read functions used to do). Most plugins should define their own
472# process() function and let this read() function keep control.
473#
474# PagedImgPlug overrides read() because there is no need to read the actual
475# text of the file in, because the contents of the file is not text...
476#
477# Return number of files processed, undef if can't process
478# Note that $base_dir might be "" and that $file might
479# include directories
480
481sub read {
482 my $self = shift (@_);
483 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
484 my $outhandle = $self->{'outhandle'};
485
486 my $filename = &util::filename_cat($base_dir, $file);
487 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
488 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
489 return undef;
490 }
491
492 print $outhandle "PagedImgPlug processing \"$filename\"\n"
493 if $self->{'verbosity'} > 1;
494
495 my ($dir);
496 ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/;
497
498 #process the .item file
499 my $doc_obj = $self->process_item($filename, $dir, $file, $processor);
500
501 # include any metadata passed in from previous plugins
502 # note that this metadata is associated with the top level section
503 my $section = $doc_obj->get_top_section();
504 $self->extra_metadata ($doc_obj, $section, $metadata);
505
506 # do plugin specific processing of doc_obj
507 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir,
508 $file, $metadata, $doc_obj));
509
510 # do any automatic metadata extraction
511 $self->auto_extract_metadata ($doc_obj);
512
513 # process the document
514 $processor->process($doc_obj);
515
516 # clean up temporary files - we do this here instead of in
517 # process_image becuase associated files aren't actually copied
518 # until after process has been run.
519 if (defined $self->{'tmp_filename1'} &&
520 -e $self->{'tmp_filename1'}) {
521 &util::rm($self->{'tmp_filename1'})
522 }
523 if (defined $self->{'tmp_filename2'} &&
524 -e $self->{'tmp_filename2'}) {
525 &util::rm($self->{'tmp_filename2'})
526 }
527 if (defined $self->{'tmp_filename3'} &&
528 -e $self->{'tmp_filename3'}) {
529 &util::rm($self->{'tmp_filename3'})
530 }
531
532 $self->{'num_processed'}++;
533
534 return 1;
535}
536
537sub process_item {
538 my $self = shift (@_);
539 my ($filename, $dir, $file, $processor) = @_;
540
541 my $doc_obj = new doc ($file, "indexed_doc");
542 my $topsection = $doc_obj->get_top_section();
543
544 if ($self->{'doctype'} eq 'paged') {
545 # set the gsdlthistype metadata to Paged - this ensures this document will
546 # be treated as a Paged doc, even if Titles are not numeric
547
548 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");
549 } else {
550 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");
551 }
552 open (ITEMFILE, $filename) || die "couldn't open $filename\n";
553 my $line = "";
554 my $num = 0;
555 my $first = 1;
556 while (defined ($line = <ITEMFILE>)) {
557 next unless $line =~ /\w/;
558 chomp $line;
559 if ($line =~ /^<([^>]*)>(.*?)\s*$/) {
560 $doc_obj->set_utf8_metadata_element ($topsection, $1, $2);
561 $meta->{$1} = $2;
562 } else {
563 $num++;
564 # line should be like page:imagefilename:textfilename:r - the r is optional -> means rotate the image 180 deg
565 $line =~ s/^\s+//; #remove space at the front
566 $line =~ s/\s+$//; #remove space at the end
567 my ($pagenum, $imgname, $txtname, $rotate) = split /:/, $line;
568
569 # create a new section for each image file
570 # the first image may go in the top section or in the first child section (if headerpage is true)
571 my $cursection;
572 if ($first) {
573 if ( $self->{'noheaderpage'}) {
574 $cursection = $topsection;
575 } else {
576 $cursection = $doc_obj->insert_section($doc_obj->get_end_child($topsection));
577 }
578 $first=0;
579 } else {
580 $cursection = $doc_obj->insert_section($doc_obj->get_end_child($topsection));
581 }
582 # the page number becomes the Title
583 $doc_obj->set_utf8_metadata_element($cursection, 'Title', $pagenum);
584 # process the image for this page
585 my $result = $self->process_image($dir.$imgname, $imgname, $doc_obj, $cursection, $rotate);
586
587 if (!defined $result)
588 {
589 print "PagedImgPlug: couldn't process image \"$dir.$imgname\" for item \"$filename\"\n";
590 }
591
592 # process the text file if one is there
593 if (defined $txtname && $txtname ne "") {
594 $result = undef;
595 $result = $self->process_text ($dir.$txtname, $txtname, $doc_obj, $cursection);
596 if (!defined $result) {
597 print "PagedImgPlug: couldn't process text file \"$dir.$txtname\" for item \"$filename\"\n";
598 }
599 } else {
600 # otherwise add in some dummy text
601 $doc_obj->add_text($cursection, "Dummy text to sidestep display bug");
602 }
603 }
604 }
605
606 close ITEMFILE;
607
608 $file =~ s/\.item//i;
609 #use the name of the .item file as the OID
610 $doc_obj->set_OID ($file);
611 # add numpages metadata
612 $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', "$num");
613 return $doc_obj;
614}
615
616sub process_text {
617 my $self = shift (@_);
618 my ($fullpath, $file, $doc_obj, $cursection) = @_;
619
620 # Do encoding stuff
621 my ($language, $encoding) = $self->textcat_get_language_encoding ($fullpath);
622
623 my $text="";
624 &BasPlug::read_file($self, $fullpath, $encoding, $language, \$text);
625 if (!length ($text)) {
626 my $plugin_name = ref ($self);
627 print "PagedImgPlug: ERROR: $fullpath contains no text\n" if $self->{'verbosity'};
628 return 0;
629 }
630
631 # we need to escape the escape character, or else mg will convert into
632 # eg literal newlines, instead of leaving the text as '\n'
633 $text =~ s/\\/\\\\/g; # macro language
634 $text =~ s/_/\\_/g; # macro language
635 $text =~ s/</&lt;/g;
636 $text =~ s/>/&gt;/g;
637
638 # insert preformat tags and add text to document object
639 $doc_obj->add_utf8_text($cursection, "<pre>\n$text\n</pre>");
640
641 return 1;
642}
643
644# do plugin specific processing of doc_obj
645sub process {
646 my $self = shift (@_);
647 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
648 my $outhandle = $self->{'outhandle'};
649
650 return 1;
651}
652
6531;
654
655
656
657
658
659
660
661
662
663
664
Note: See TracBrowser for help on using the repository browser.