source: trunk/gsdl/perllib/plugins/BasPlug.pm@ 1857

Last change on this file since 1857 was 1857, checked in by dmm9, 23 years ago

date extraction options documented

  • Property svn:keywords set to Author Date Id Revision
File size: 23.4 KB
Line 
1###########################################################################
2#
3# BasPlug.pm -- base class for all the import plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package BasPlug;
27
28use parsargv;
29use multiread;
30use cnseg;
31use acronym;
32use textcat;
33use doc;
34use diagnostics;
35use DateExtract;
36use iso639;
37
38# if textcat returns an encoding that isn't in this list
39# we'll print a warning and use the default encoding instead
40%supported_encodings = (
41 "ascii" => "",
42 "iso_8859_1" => "",
43 "windows_1252" => "",
44 "iso_8859_2" => "",
45 "windows_1250" => "",
46 "iso_8859_3" => "",
47 "iso_8859_4" => "",
48 "iso_8859_5" => "",
49 "windows_1251" => "",
50 "koi8_r" => "",
51 "koi8_u" => "",
52 "iso_8859_6" => "",
53 "windows_1256" => "",
54 "iso_8859_7" => "",
55 "windows_1253" => "",
56 "iso_8859_8" => "",
57 "windows_1255" => "",
58 "iso_8859_9" => "",
59 "windows_1254" => "",
60 "gb" => ""
61 );
62
63sub print_general_usage {
64 my ($plugin_name) = @_;
65
66 print STDERR "\n usage: plugin $plugin_name [options]\n\n";
67
68 print STDERR " -process_exp A perl regular expression to match against filenames.\n";
69 print STDERR " Matching filenames will be processed by this plugin.\n";
70 print STDERR " Each plugin has its own default process_exp. e.g HTMLPlug\n";
71 print STDERR " defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
72 print STDERR " .htm or .html (case-insensitive).\n\n";
73
74 print STDERR " -block_exp Files matching this regular expression will be blocked from\n";
75 print STDERR " being passed to any further plugins in the list. This has no\n";
76 print STDERR " real effect other than to prevent lots of warning messages\n";
77 print STDERR " about input files you don't care about. Each plugin may or may\n";
78 print STDERR " not have a default block_exp. e.g. by default HTMLPlug blocks\n";
79 print STDERR " any files with .gif, .jpg, .jpeg, .png, .rtf or .css\n";
80 print STDERR " file extensions.\n\n";
81
82
83 print STDERR " -input_encoding The encoding of the source documents. Documents will be\n";
84 print STDERR " converted from these encodings and stored internally as\n";
85 print STDERR " utf8. The default input_encoding is 'auto'. Accepted values\n";
86 print STDERR " are:\n";
87
88 print STDERR " auto: Use text categorization algorithm to automatically\n";
89 print STDERR " identify the encoding of each source document. This\n";
90 print STDERR " will be slower than explicitly setting the encoding\n";
91 print STDERR " but will work where more than one encoding is used\n";
92 print STDERR " within the same collection.\n";
93
94 print STDERR " ascii: Plain 7 bit ascii. This may be a little faster than\n";
95 print STDERR " using iso_8859_1. Beware of using 'ascii' on a collection\n";
96 print STDERR " of documents that may contain characters outside of plain\n";
97 print STDERR " 7 bit ascii though (e.g. German or French documents\n";
98 print STDERR " containing accents), use iso_8859_1 instead.\n";
99
100 print STDERR " utf8: either utf8 or unicode -- automatically detected\n";
101 print STDERR " unicode: just unicode\n";
102
103 print STDERR " iso_8859_1: Latin1 (western european languages)\n";
104 print STDERR " windows_1252: Windows codepage 1252 (WinLatin1)\n";
105
106 print STDERR " iso_8859_2: Latin2 (central and eastern european languages)\n";
107 print STDERR " windows_1250: Windows codepage 1250 (WinLatin2)\n";
108
109 print STDERR " iso_8859_3: Latin3\n";
110
111 print STDERR " iso_8859_4: Latin4\n";
112
113 print STDERR " iso_8859_5: Cyrillic\n";
114 print STDERR " windows_1251: Windows codepage 1251 (WinCyrillic)\n";
115 print STDERR " koi8_r: Cyrillic - Russian\n";
116 print STDERR " koi8_u: Cyrillic - Ukrainian\n";
117
118 print STDERR " iso_8859_6: Arabic\n";
119 print STDERR " windows_1256: Windows codepage 1256 (WinArabic)\n";
120
121 print STDERR " iso_8859_7: Greek\n";
122 print STDERR " windows_1253: Windows codepage 1253 (WinGreek)\n";
123
124 print STDERR " iso_8859_8: Hebrew\n";
125 print STDERR " windows_1255: Windows codepage 1255 (WinHebrew)\n";
126
127 print STDERR " iso_8859_9: Latin5\n";
128 print STDERR " windows_1254: Windows codepage 1254 (WinTurkish)\n";
129
130 print STDERR " gb: GB or GBK simplified Chinese\n\n";
131
132 print STDERR " -default_encoding If -input_encoding is set to 'auto' and the text categorization\n";
133 print STDERR " algorithm fails to extract the encoding or extracts an encoding\n";
134 print STDERR " that is not supported by Greenstone, this encoding will be used\n";
135 print STDERR " instead. The default is iso_8859_1\n\n";
136
137 print STDERR " -extract_language Identify the language of each document and set 'Language' metadata. Note\n";
138 print STDERR " that this will be done automatically if -input_encoding is 'auto'.\n";
139 print STDERR " -default_language If Greenstone fails to work out what language a document is the\n";
140 print STDERR " 'Language' metadata element will be set to this value. The default\n";
141 print STDERR " is 'en' (ISO 639 language symbols should be used - en = English).\n";
142 print STDERR " Note that if -input_encoding is not set to 'auto' and -extract_language\n";
143 print STDERR " is not set, all documents will have their 'Language' metadata set to\n";
144 print STDERR " this value.\n\n";
145
146 print STDERR " -extract_acronyms Extract acronyms from within text and set as metadata\n\n";
147
148 print STDERR " -markup_acronyms Add acronym metadata into document text\n\n";
149
150 print STDERR " -first Comma seperated list of first sizes to extract from the text\n";
151 print STDERR " into a metadata field. The fields are called 'FirstNNN'.\n\n";
152
153 print STDERR " -extract_email Extract email addresses as metadata\n\n";
154
155 print STDERR " -extract_date Extract dates pertaining to the content of documents about history\n\n";
156 print STDERR " -maximum_date The maximum historical date to be used as metadata (in a Common Era date such as 1950)\n\n";
157 print STDERR " -maximum_century The maximum named ceuntury to be extracted as historical metadata (e.g. 14 will extract all references up to the 14th century)\n\n";
158 print STDERR " -no_bibliography Do not try and block pbibliographic dates when extracting historical dates.\n\n";
159}
160
161# print_usage should be overridden for any sub-classes having
162# their own plugin specific options
163sub print_usage {
164 print STDERR "\nThis plugin has no plugin specific options\n\n";
165
166}
167
168sub new {
169 my $class = shift (@_);
170 my $plugin_name = shift (@_);
171 my $self = {};
172
173 my $enc = "^(";
174 map {$enc .= "|$_";} keys %supported_encodings;
175 my $denc = $enc . "|utf8|unicode)\$";
176 $enc .= "|utf8|unicode|auto)\$";
177
178 $self->{'outhandle'} = STDERR;
179 my $year = (localtime)[5]+1900;
180
181 # general options available to all plugins
182 if (!parsargv::parse(\@_,
183 q^process_exp/.*/^, \$self->{'process_exp'},
184 q^block_exp/.*/^, \$self->{'block_exp'},
185 qq^input_encoding/$enc/auto^, \$self->{'input_encoding'},
186 qq^default_encoding/$denc/iso_8859_1^, \$self->{'default_encoding'},
187 q^extract_acronyms^, \$self->{'extract_acronyms'},
188 q^extract_email^, \$self->{'extract_email'},
189 q^markup_acronyms^, \$self->{'markup_acronyms'},
190 q^extract_language^, \$self->{'extract_language'},
191 q^default_language/.{2}/en^, \$self->{'default_language'},
192 q^first/.*/^, \$self->{'first'},
193 q^extract_date^, \$self->{'date_extract'},
194 qq^maximum_date/\\d{4}/$year^, \$self->{'max_year'},
195 q^no_bibliography^, \$self->{'no_biblio'},
196 qq^maximum_century/-?\\d{1,2}( ?B\\.C\\.E\\.)?/-1^, \$self->{'max_century'},
197 "allow_extra_options")) {
198
199 print STDERR "\nThe $plugin_name plugin uses an incorrect general option (general options are those\n";
200 print STDERR "available to all plugins). Check your collect.cfg configuration file.\n";
201 &print_general_usage($plugin_name);
202 die "\n";
203 }
204
205 return bless $self, $class;
206}
207
208# initialize BasPlug options
209# if init() is overridden in a sub-class, remember to call BasPlug::init()
210sub init {
211 my $self = shift (@_);
212 my ($verbosity, $outhandle) = @_;
213
214 # verbosity is passed through from the processor
215 $self->{'verbosity'} = $verbosity;
216
217 # as is the outhandle ...
218 $self->{'outhandle'} = $outhandle if defined $outhandle;
219
220 # set process_exp and block_exp to defaults unless they were
221 # explicitly set
222
223 if ((!$self->is_recursive()) and
224 (!defined $self->{'process_exp'}) || ($self->{'process_exp'} eq "")) {
225
226 $self->{'process_exp'} = $self->get_default_process_exp ();
227 if ($self->{'process_exp'} eq "") {
228 warn ref($self) . " Warning: Non-recursive plugin has no process_exp\n";
229 }
230 }
231
232 if ((!defined $self->{'block_exp'}) || ($self->{'block_exp'} eq "")) {
233 $self->{'block_exp'} = $self->get_default_block_exp ();
234 }
235}
236
237sub begin {
238 my $self = shift (@_);
239 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
240 $self->initialise_extractors();
241}
242
243sub end {
244 my ($self) = @_;
245 $self->finalise_extractors();
246}
247
248# this function should be overridden to return 1
249# in recursive plugins
250sub is_recursive {
251 my $self = shift (@_);
252
253 return 0;
254}
255
256sub get_default_block_exp {
257 my $self = shift (@_);
258
259 return "";
260}
261
262sub get_default_process_exp {
263 my $self = shift (@_);
264
265 return "";
266}
267
268# The BasPlug read() function. This function does all the right things
269# to make general options work for a given plugin. It calls the process()
270# function which does all the work specific to a plugin (like the old
271# read functions used to do). Most plugins should define their own
272# process() function and let this read() function keep control.
273#
274# recursive plugins (e.g. RecPlug) and specialized plugins like those
275# capable of processing many documents within a single file (e.g.
276# GMLPlug) should normally implement their own version of read()
277#
278# Return number of files processed, undef if can't process
279# Note that $base_dir might be "" and that $file might
280# include directories
281
282sub read {
283 my $self = shift (@_);
284 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
285
286 if ($self->is_recursive()) {
287 die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
288 }
289
290 my $outhandle = $self->{'outhandle'};
291
292 my $filename = &util::filename_cat($base_dir, $file);
293 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
294 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
295 return undef;
296 }
297 my $plugin_name = ref ($self);
298 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
299
300 my ($language, $encoding);
301 if ($self->{'input_encoding'} eq "auto") {
302 # use textcat to automatically work out the input encoding and language
303 ($language, $encoding) = $self->get_language_encoding ($filename);
304
305 } elsif ($self->{'extract_language'}) {
306 # use textcat to get language metadata
307 ($language, $extracted_encoding) = $self->get_language_encoding ($filename);
308 $encoding = $self->{'input_encoding'};
309
310 if ($extracted_encoding ne $encoding && $self->{'verbosity'}) {
311 print $outhandle "$plugin_name: WARNING: $file was read using $encoding encoding but ";
312 print $outhandle "appears to be encoded as $extracted_encoding.\n";
313 }
314
315 } else {
316 $language = $self->{'default_language'};
317 $encoding = $self->{'input_encoding'};
318 }
319
320 # create a new document
321 my $doc_obj = new doc ($filename, "indexed_doc");
322 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language);
323 $doc_obj->set_source_encoding ($encoding);
324
325
326 # read in file ($text will be in utf8)
327 my $text = "";
328 $self->read_file ($filename, $encoding, \$text);
329
330 if (!length ($text)) {
331 print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
332 return 0;
333 }
334
335 # include any metadata passed in from previous plugins
336 # note that this metadata is associated with the top level section
337 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
338
339 # do plugin specific processing of doc_obj
340 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
341
342 # do any automatic metadata extraction
343 $self->auto_extract_metadata ($doc_obj);
344
345 # add an OID
346 $doc_obj->set_OID();
347
348 # process the document
349 $processor->process($doc_obj);
350
351 return 1; # processed the file
352}
353
354# returns undef if file is rejected by the plugin
355sub process {
356 my $self = shift (@_);
357 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
358
359 die "Basplug::process function must be implemented in sub-class\n";
360
361 return undef; # never gets here
362}
363
364# uses the multiread package to read in the entire file pointed to
365# by filename and loads the resulting text into $$textref. Input text
366# may be in any of the encodings handled by multiread, output text
367# will be in utf8
368sub read_file {
369 my $self = shift (@_);
370 my ($filename, $encoding, $textref) = @_;
371
372 if (!-r $filename)
373 {
374 my $outhandle = $self->{'outhandle'};
375 print $outhandle "Read permission denied for $filename\n" if $self->{'verbosity'};
376 return;
377 }
378
379 $$textref = "";
380
381 open (FILE, $filename) || die "BasPlug::read_file could not open $filename for reading ($!)\n";
382
383 if ($encoding eq "ascii") {
384 undef $/;
385 $$textref = <FILE>;
386 $/ = "\n";
387 } else {
388 my $reader = new multiread();
389 $reader->set_handle ('BasPlug::FILE');
390 $reader->set_encoding ($encoding);
391 $reader->read_file ($textref);
392
393 if ($encoding eq "gb") {
394 # segment the Chinese words
395 $$textref = &cnseg::segment($$textref);
396 }
397 }
398
399 close FILE;
400}
401
402# Uses textcat to work out the encoding and language of the text in
403# $filename. All html tags are removed before processing.
404# returns an array containing "language" and "encoding"
405sub get_language_encoding {
406 my $self = shift (@_);
407 my ($filename) = @_;
408 my $outhandle = $self->{'outhandle'};
409
410 # read in file
411 open (FILE, $filename) || die "BasPlug::get_language_encoding could not open $filename for reading ($!)\n";
412 undef $/;
413 my $text = <FILE>;
414 $/ = "\n";
415 close FILE;
416
417 # remove all HTML tags
418 $text =~ s/<[^>]*>//sg;
419
420 # get the language/encoding
421 my @results = textcat::classify($text);
422
423# foreach $i (@results) {
424# print STDERR "i: $i\n";
425# }
426
427 if (scalar @results != 1) {
428 if ($self->{'input_encoding'} ne 'auto') {
429 if ($self->{'extract_language'} && $self->{'verbosity'}) {
430 print $outhandle "BasPlug: WARNING: language could not be extracted from $filename - ";
431 print $outhandle "defaulting to $self->{'default_language'}\n";
432 }
433 return ($self->{'default_language'}, $self->{'input_encoding'});
434
435 } else {
436 if ($self->{'verbosity'}) {
437 print $outhandle "BASPlug: WARNING: language/encoding could not be extracted from $filename - ";
438 print $outhandle "defaulting to $self->{'default_language'}/$self->{'default_encoding'}\n";
439 }
440 return ($self->{'default_language'}, $self->{'default_encoding'});
441 }
442 }
443
444 # format language/encoding
445 my ($language, $encoding) = $results[0] =~ /^([^-]*)(?:-(.*))?$/;
446 $language = $iso639::toiso639{lc($language)};
447 die "Invalid language\n" if !defined $language;
448
449 if (!defined $encoding) {
450 # if textcat returned no encoding info it is assumed to be iso_8859_1
451 $encoding = "iso_8859_1";
452 } else {
453 # convert to the format we expect
454 $encoding =~ s/windows/windows_/;
455 $encoding =~ s/iso8859/iso_8859/;
456 $encoding =~ s/^gb.*$/gb/;
457 }
458
459 if (!defined $supported_encodings{$encoding}) {
460 if ($self->{'verbosity'}) {
461 print $outhandle "BasPlug: WARNING: $filename appears to be encoded in an unsupported encoding ($encoding) - ";
462 print $outhandle "using $self->{'default_encoding'}\n";
463 }
464 $encoding = $self->{'default_encoding'};
465 }
466
467 return ($language, $encoding);
468}
469
470# add any extra metadata that's been passed around from one
471# plugin to another.
472# extra_metadata uses add_utf8_metadata so it expects metadata values
473# to already be in utf8
474sub extra_metadata {
475 my $self = shift (@_);
476 my ($doc_obj, $cursection, $metadata) = @_;
477
478 foreach my $field (keys(%$metadata)) {
479 # $metadata->{$field} may be an array reference
480 if (ref ($metadata->{$field}) eq "ARRAY") {
481 map {
482 $doc_obj->add_utf8_metadata ($cursection, $field, $_);
483 } @{$metadata->{$field}};
484 } else {
485 $doc_obj->add_utf8_metadata ($cursection, $field, $metadata->{$field});
486 }
487 }
488}
489
490# initialise metadata extractors
491sub initialise_extractors {
492 my $self = shift (@_);
493
494 if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
495 &acronym::initialise_acronyms();
496 }
497}
498
499# finalise metadata extractors
500sub finalise_extractors {
501 my $self = shift (@_);
502
503 if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
504 &acronym::finalise_acronyms();
505 }
506}
507
508# FIRSTNNN: extract the first NNN characters as metadata
509sub extract_first_NNNN_characters {
510 my $self = shift (@_);
511 my ($textref, $doc_obj, $thissection) = @_;
512
513 foreach my $size (split /,/, $self->{'first'}) {
514 my $tmptext = $$textref;
515 $tmptext =~ s/^\s+//;
516 $tmptext =~ s/\s+$//;
517 $tmptext =~ s/\s+/ /gs;
518 $tmptext = substr ($tmptext, 0, $size);
519 $tmptext =~ s/\s\S*$/&#8230;/;
520 $doc_obj->add_utf8_metadata ($thissection, "First$size", $tmptext);
521 }
522}
523
524sub extract_email {
525 my $self = shift (@_);
526 my ($textref, $doc_obj, $thissection) = @_;
527 my $outhandle = $self->{'outhandle'};
528
529 print $outhandle " extracting email addresses ...\n"
530 if ($self->{'verbosity'} > 2);
531
532 my @email = ($$textref =~ m/([-a-z0-9\.@+_=]+@(?:[-a-z0-9]+\.)+(?:com|org|edu|mil|int|[a-z][a-z]))/g);
533 @email = sort @email;
534
535 my @email2 = ();
536 foreach my $address (@email) {
537 if (!(join(" ",@email2) =~ m/$address/ )) {
538 push @email2, $address;
539 $doc_obj->add_utf8_metadata ($thissection, "emailAddress", $address);
540 print $outhandle " extracting $address\n"
541 if ($self->{'verbosity'} > 3);
542 }
543 }
544 print $outhandle " done extracting email addresses.\n"
545 if ($self->{'verbosity'} > 2);
546
547}
548
549# extract metadata
550sub auto_extract_metadata {
551 my $self = shift (@_);
552 my ($doc_obj) = @_;
553
554 if ($self->{'extract_email'}) {
555 my $thissection = $doc_obj->get_top_section();
556 while (defined $thissection) {
557 my $text = $doc_obj->get_text($thissection);
558 $self->extract_email (\$text, $doc_obj, $thissection) if $text =~ /./;
559 $thissection = $doc_obj->get_next_section ($thissection);
560 }
561 }
562 if ($self->{'first'}) {
563 my $thissection = $doc_obj->get_top_section();
564 while (defined $thissection) {
565 my $text = $doc_obj->get_text($thissection);
566 $self->extract_first_NNNN_characters (\$text, $doc_obj, $thissection) if $text =~ /./;
567 $thissection = $doc_obj->get_next_section ($thissection);
568 }
569 }
570
571 if ($self->{'extract_acronyms'}) {
572 my $thissection = $doc_obj->get_top_section();
573 while (defined $thissection) {
574 my $text = $doc_obj->get_text($thissection);
575 $self->extract_acronyms (\$text, $doc_obj, $thissection) if $text =~ /./;
576 $thissection = $doc_obj->get_next_section ($thissection);
577 }
578 }
579
580 if ($self->{'markup_acronyms'}) {
581 my $thissection = $doc_obj->get_top_section();
582 while (defined $thissection) {
583 my $text = $doc_obj->get_text($thissection);
584 $text = $self->markup_acronyms ($text, $doc_obj, $thissection);
585 $doc_obj->delete_text($thissection);
586 $doc_obj->add_text($thissection, $text);
587 $thissection = $doc_obj->get_next_section ($thissection);
588 }
589 }
590
591 if($self->{'date_extract'}) {
592 my $thissection = $doc_obj->get_top_section();
593 while (defined $thissection) {
594
595 my $text = $doc_obj->get_text($thissection);
596 &DateExtract::get_date_metadata($text, $doc_obj,
597 $thissection,
598 $self->{'no_biblio'},
599 $self->{'max_year'},
600 $self->{'max_century'});
601 $thissection = $doc_obj->get_next_section ($thissection);
602 }
603 }
604}
605
606# extract acronyms from a section in a document. progress is
607# reported to outhandle based on the verbosity. both the Acronym
608# and the AcronymKWIC metadata items are created.
609
610sub extract_acronyms {
611 my $self = shift (@_);
612 my ($textref, $doc_obj, $thissection) = @_;
613 my $outhandle = $self->{'outhandle'};
614
615 print $outhandle " extracting acronyms ...\n"
616 if ($self->{'verbosity'} > 2);
617
618 my $acro_array = &acronym::acronyms($textref);
619
620 foreach my $acro (@$acro_array) {
621
622 #check that this is the first time ...
623 my $seen_before = "false";
624 my $previous_data = $doc_obj->get_metadata($thissection, "Acronym");
625 foreach my $thisAcro (@$previous_data) {
626 if ($thisAcro eq $acro->to_string()) {
627 $seen_before = "true";
628 print $outhandle " already seen ". $acro->to_string() . "\n"
629 if ($self->{'verbosity'} >= 4);
630 }
631 }
632
633 if ($seen_before eq "false") {
634 #write it to the file ...
635 $acro->write_to_file();
636
637 #do the normal acronym
638 $doc_obj->add_utf8_metadata($thissection, "Acronym", $acro->to_string());
639 print $outhandle " adding ". $acro->to_string() . "\n"
640 if ($self->{'verbosity'} > 3);
641
642 }
643 }
644 print $outhandle " done extracting acronyms. \n"
645 if ($self->{'verbosity'} > 2);
646}
647
648sub markup_acronyms {
649 my $self = shift (@_);
650 my ($text, $doc_obj, $thissection) = @_;
651 my $outhandle = $self->{'outhandle'};
652
653 print $outhandle " marking up acronyms ...\n"
654 if ($self->{'verbosity'} > 2);
655
656 #self is passed in to check for verbosity ...
657 $text = &acronym::markup_acronyms($text, $self);
658
659 print $outhandle " done marking up acronyms. \n"
660 if ($self->{'verbosity'} > 2);
661
662 return $text;
663}
664
6651;
Note: See TracBrowser for help on using the repository browser.