root/gsdl/trunk/perllib/plugins/AcronymExtractor.pm @ 15887

Revision 15887, 3.9 KB (checked in by mdewsnip, 12 years ago)

Added "use strict" to the few files that were missing it, and fixing resulting problems in MediaWikiPlug?.pm.

  • Property svn:executable set to *
Line 
1package AcronymExtractor;
2
3use acronym;
4use PrintInfo;
5use strict;
6
7BEGIN {
8    @AcronymExtractor::ISA = ('PrintInfo');
9}
10
11my $arguments = [
12      { 'name' => "extract_acronyms",
13    'desc' => "{AcronymExtractor.extract_acronyms}",
14    'type' => "flag",
15    'reqd' => "no" },
16      { 'name' => "markup_acronyms",
17    'desc' => "{AcronymExtractor.markup_acronyms}",
18    'type' => "flag",
19    'reqd' => "no" } ];
20
21my $options = { 'name'     => "AcronymExtractor",
22        'desc'     => "{AcronymExtractor.desc}",
23        'abstract' => "yes",
24        'inherits' => "yes",
25        'args'     => $arguments };
26
27
28sub new {
29    my ($class) = shift (@_);
30    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
31    push(@$pluginlist, $class);
32
33    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
34    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
35
36    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists,1);
37
38    return bless $self, $class;
39
40}
41
42
43# initialise metadata extractors
44sub initialise_acronym_extractor {
45    my $self = shift (@_);
46
47    if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
48    &acronym::initialise_acronyms();
49    }
50}
51
52# finalise metadata extractors
53sub finalise_acronym_extractor {
54    my $self = shift (@_);
55
56    if ($self->{'extract_acronyms'} || $self->{'markup_acronyms'}) {
57    &acronym::finalise_acronyms();
58    }
59}
60
61# extract metadata
62sub extract_acronym_metadata {
63
64    my $self = shift (@_);
65    my ($doc_obj) = @_;
66   
67
68    if ($self->{'extract_acronyms'}) {
69    my $thissection = $doc_obj->get_top_section();
70    while (defined $thissection) {
71        my $text = $doc_obj->get_text($thissection);
72        $self->extract_acronyms (\$text, $doc_obj, $thissection) if $text =~ /./;
73        $thissection = $doc_obj->get_next_section ($thissection);
74    }
75    }
76   
77    if ($self->{'markup_acronyms'}) {
78    my $thissection = $doc_obj->get_top_section();
79    while (defined $thissection) {
80        my $text = $doc_obj->get_text($thissection);
81        $text = $self->markup_acronyms ($text, $doc_obj, $thissection);
82        $doc_obj->delete_text($thissection);
83        $doc_obj->add_text($thissection, $text);
84        $thissection = $doc_obj->get_next_section ($thissection);
85    }
86    }
87
88}
89
90
91
92# extract acronyms from a section in a document. progress is
93# reported to outhandle based on the verbosity. both the Acronym
94# and the AcronymKWIC metadata items are created.
95
96sub extract_acronyms {
97    my $self = shift (@_);
98    my ($textref, $doc_obj, $thissection) = @_;
99    my $outhandle = $self->{'outhandle'};
100
101    # print $outhandle " extracting acronyms ...\n"
102    gsprintf($outhandle, " {BasPlug.extracting_acronyms}...\n")
103    if ($self->{'verbosity'} > 2);
104
105    my $acro_array =  &acronym::acronyms($textref);
106   
107    foreach my $acro (@$acro_array) {
108
109    #check that this is the first time ...
110    my $seen_before = "false";
111    my $previous_data = $doc_obj->get_metadata($thissection, "Acronym");
112    foreach my $thisAcro (@$previous_data) {
113        if ($thisAcro eq $acro->to_string()) {
114        $seen_before = "true";
115        if ($self->{'verbosity'} >= 4) {
116            gsprintf($outhandle, " {BasPlug.already_seen} " .
117                 $acro->to_string() . "\n");
118        }
119        }
120    }
121
122    if ($seen_before eq "false") {
123        #write it to the file ...
124        $acro->write_to_file();
125
126        #do the normal acronym
127        $doc_obj->add_utf8_metadata($thissection, "Acronym",  $acro->to_string());
128        gsprintf($outhandle, " {BasPlug.adding} ".$acro->to_string()."\n")
129        if ($self->{'verbosity'} > 3);
130    }
131    }
132
133    gsprintf($outhandle, " {BasPlug.done_acronym_extract}\n")
134    if ($self->{'verbosity'} > 2);
135}
136
137sub markup_acronyms {
138    my $self = shift (@_);
139    my ($text, $doc_obj, $thissection) = @_;
140    my $outhandle = $self->{'outhandle'};
141
142    gsprintf($outhandle, " {BasPlug.marking_up_acronyms}...\n")
143    if ($self->{'verbosity'} > 2);
144
145    #self is passed in to check for verbosity ...
146    $text = &acronym::markup_acronyms($text, $self);
147
148    gsprintf($outhandle, " {BasPlug.done_acronym_markup}\n")
149    if ($self->{'verbosity'} > 2);
150
151    return $text;
152}
153
1541;
Note: See TracBrowser for help on using the browser.