source: gsdl/trunk/perllib/plugins/AcronymExtractor.pm@ 15887

Last change on this file since 15887 was 15887, checked in by mdewsnip, 13 years ago

Added "use strict" to the few files that were missing it, and fixing resulting problems in MediaWikiPlug.pm.

  • Property svn:executable set to *
File size: 3.9 KB
Line 
1package AcronymExtractor;
2
3use acronym;
4use PrintInfo;
5use strict;
6
7BEGIN {
8 @AcronymExtractor::ISA = ('PrintInfo');
9}
10
11my $arguments = [
12 { 'name' => "extract_acronyms",
13 'desc' => "{AcronymExtractor.extract_acronyms}",
14 'type' => "flag",
15 'reqd' => "no" },
16 { 'name' => "markup_acronyms",
17 'desc' => "{AcronymExtractor.markup_acronyms}",
18 'type' => "flag",
19 'reqd' => "no" } ];
20
21my $options = { 'name' => "AcronymExtractor",
22 'desc' => "{AcronymExtractor.desc}",
23 'abstract' => "yes",
24 'inherits' => "yes",
25 'args' => $arguments };
26
27
28sub new {
29 my ($class) = shift (@_);
30 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
31 push(@$pluginlist, $class);
32
33 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
34 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
35
36 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists,1);
37
38 return bless $self, $class;
39
40}
41
42
43# initialise metadata extractors
44sub initialise_acronym_extractor {
45 my $self = shift (@_);
46
47 if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
48 &acronym::initialise_acronyms();
49 }
50}
51
52# finalise metadata extractors
53sub finalise_acronym_extractor {
54 my $self = shift (@_);
55
56 if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
57 &acronym::finalise_acronyms();
58 }
59}
60
61# extract metadata
62sub extract_acronym_metadata {
63
64 my $self = shift (@_);
65 my ($doc_obj) = @_;
66
67
68 if ($self->{'extract_acronyms'}) {
69 my $thissection = $doc_obj->get_top_section();
70 while (defined $thissection) {
71 my $text = $doc_obj->get_text($thissection);
72 $self->extract_acronyms (\$text, $doc_obj, $thissection) if $text =~ /./;
73 $thissection = $doc_obj->get_next_section ($thissection);
74 }
75 }
76
77 if ($self->{'markup_acronyms'}) {
78 my $thissection = $doc_obj->get_top_section();
79 while (defined $thissection) {
80 my $text = $doc_obj->get_text($thissection);
81 $text = $self->markup_acronyms ($text, $doc_obj, $thissection);
82 $doc_obj->delete_text($thissection);
83 $doc_obj->add_text($thissection, $text);
84 $thissection = $doc_obj->get_next_section ($thissection);
85 }
86 }
87
88}
89
90
91
92# extract acronyms from a section in a document. progress is
93# reported to outhandle based on the verbosity. both the Acronym
94# and the AcronymKWIC metadata items are created.
95
96sub extract_acronyms {
97 my $self = shift (@_);
98 my ($textref, $doc_obj, $thissection) = @_;
99 my $outhandle = $self->{'outhandle'};
100
101 # print $outhandle " extracting acronyms ...\n"
102 gsprintf($outhandle, " {BasPlug.extracting_acronyms}...\n")
103 if ($self->{'verbosity'} > 2);
104
105 my $acro_array = &acronym::acronyms($textref);
106
107 foreach my $acro (@$acro_array) {
108
109 #check that this is the first time ...
110 my $seen_before = "false";
111 my $previous_data = $doc_obj->get_metadata($thissection, "Acronym");
112 foreach my $thisAcro (@$previous_data) {
113 if ($thisAcro eq $acro->to_string()) {
114 $seen_before = "true";
115 if ($self->{'verbosity'} >= 4) {
116 gsprintf($outhandle, " {BasPlug.already_seen} " .
117 $acro->to_string() . "\n");
118 }
119 }
120 }
121
122 if ($seen_before eq "false") {
123 #write it to the file ...
124 $acro->write_to_file();
125
126 #do the normal acronym
127 $doc_obj->add_utf8_metadata($thissection, "Acronym", $acro->to_string());
128 gsprintf($outhandle, " {BasPlug.adding} ".$acro->to_string()."\n")
129 if ($self->{'verbosity'} > 3);
130 }
131 }
132
133 gsprintf($outhandle, " {BasPlug.done_acronym_extract}\n")
134 if ($self->{'verbosity'} > 2);
135}
136
137sub markup_acronyms {
138 my $self = shift (@_);
139 my ($text, $doc_obj, $thissection) = @_;
140 my $outhandle = $self->{'outhandle'};
141
142 gsprintf($outhandle, " {BasPlug.marking_up_acronyms}...\n")
143 if ($self->{'verbosity'} > 2);
144
145 #self is passed in to check for verbosity ...
146 $text = &acronym::markup_acronyms($text, $self);
147
148 gsprintf($outhandle, " {BasPlug.done_acronym_markup}\n")
149 if ($self->{'verbosity'} > 2);
150
151 return $text;
152}
153
1541;
Note: See TracBrowser for help on using the repository browser.