source: gsdl/trunk/perllib/plugins/AcronymExtractor.pm@ 15918

Last change on this file since 15918 was 15918, checked in by kjdon, 16 years ago

tidied up new method to match other plugins

  • Property svn:executable set to *
File size: 3.9 KB
RevLine 
[15867]1package AcronymExtractor;
2
3use acronym;
4use PrintInfo;
[15887]5use strict;
[15867]6
7BEGIN {
8 @AcronymExtractor::ISA = ('PrintInfo');
9}
10
11my $arguments = [
12 { 'name' => "extract_acronyms",
13 'desc' => "{AcronymExtractor.extract_acronyms}",
14 'type' => "flag",
15 'reqd' => "no" },
16 { 'name' => "markup_acronyms",
17 'desc' => "{AcronymExtractor.markup_acronyms}",
18 'type' => "flag",
19 'reqd' => "no" } ];
20
21my $options = { 'name' => "AcronymExtractor",
22 'desc' => "{AcronymExtractor.desc}",
23 'abstract' => "yes",
24 'inherits' => "yes",
25 'args' => $arguments };
26
27
28sub new {
29 my ($class) = shift (@_);
30 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
31 push(@$pluginlist, $class);
32
[15918]33 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
34 push(@{$hashArgOptLists->{"OptList"}},$options);
[15867]35
[15881]36 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists,1);
[15867]37
38 return bless $self, $class;
39
40}
41
42
43# initialise metadata extractors
44sub initialise_acronym_extractor {
45 my $self = shift (@_);
46
47 if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
48 &acronym::initialise_acronyms();
49 }
50}
51
52# finalise metadata extractors
53sub finalise_acronym_extractor {
54 my $self = shift (@_);
55
56 if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
57 &acronym::finalise_acronyms();
58 }
59}
60
61# extract metadata
62sub extract_acronym_metadata {
63
64 my $self = shift (@_);
65 my ($doc_obj) = @_;
66
67
68 if ($self->{'extract_acronyms'}) {
69 my $thissection = $doc_obj->get_top_section();
70 while (defined $thissection) {
71 my $text = $doc_obj->get_text($thissection);
72 $self->extract_acronyms (\$text, $doc_obj, $thissection) if $text =~ /./;
73 $thissection = $doc_obj->get_next_section ($thissection);
74 }
75 }
76
77 if ($self->{'markup_acronyms'}) {
78 my $thissection = $doc_obj->get_top_section();
79 while (defined $thissection) {
80 my $text = $doc_obj->get_text($thissection);
81 $text = $self->markup_acronyms ($text, $doc_obj, $thissection);
82 $doc_obj->delete_text($thissection);
83 $doc_obj->add_text($thissection, $text);
84 $thissection = $doc_obj->get_next_section ($thissection);
85 }
86 }
87
88}
89
90
91
92# extract acronyms from a section in a document. progress is
93# reported to outhandle based on the verbosity. both the Acronym
94# and the AcronymKWIC metadata items are created.
95
96sub extract_acronyms {
97 my $self = shift (@_);
98 my ($textref, $doc_obj, $thissection) = @_;
99 my $outhandle = $self->{'outhandle'};
100
101 # print $outhandle " extracting acronyms ...\n"
102 gsprintf($outhandle, " {BasPlug.extracting_acronyms}...\n")
103 if ($self->{'verbosity'} > 2);
104
105 my $acro_array = &acronym::acronyms($textref);
106
107 foreach my $acro (@$acro_array) {
108
109 #check that this is the first time ...
110 my $seen_before = "false";
111 my $previous_data = $doc_obj->get_metadata($thissection, "Acronym");
112 foreach my $thisAcro (@$previous_data) {
113 if ($thisAcro eq $acro->to_string()) {
114 $seen_before = "true";
115 if ($self->{'verbosity'} >= 4) {
116 gsprintf($outhandle, " {BasPlug.already_seen} " .
117 $acro->to_string() . "\n");
118 }
119 }
120 }
121
122 if ($seen_before eq "false") {
123 #write it to the file ...
124 $acro->write_to_file();
125
126 #do the normal acronym
127 $doc_obj->add_utf8_metadata($thissection, "Acronym", $acro->to_string());
128 gsprintf($outhandle, " {BasPlug.adding} ".$acro->to_string()."\n")
129 if ($self->{'verbosity'} > 3);
130 }
131 }
132
133 gsprintf($outhandle, " {BasPlug.done_acronym_extract}\n")
134 if ($self->{'verbosity'} > 2);
135}
136
137sub markup_acronyms {
138 my $self = shift (@_);
139 my ($text, $doc_obj, $thissection) = @_;
140 my $outhandle = $self->{'outhandle'};
141
142 gsprintf($outhandle, " {BasPlug.marking_up_acronyms}...\n")
143 if ($self->{'verbosity'} > 2);
144
145 #self is passed in to check for verbosity ...
146 $text = &acronym::markup_acronyms($text, $self);
147
148 gsprintf($outhandle, " {BasPlug.done_acronym_markup}\n")
149 if ($self->{'verbosity'} > 2);
150
151 return $text;
152}
153
1541;
Note: See TracBrowser for help on using the repository browser.