source: gsdl/trunk/perllib/plugins/EmailAddressExtractor.pm@ 15887

Last change on this file since 15887 was 15887, checked in by mdewsnip, 16 years ago

Added "use strict" to the few files that were missing it, and fixing resulting problems in MediaWikiPlug.pm.

  • Property svn:executable set to *
File size: 2.5 KB
Line 
1package EmailAddressExtractor;
2
3use PrintInfo;
4use strict;
5
6BEGIN {
7 @EmailAddressExtractor::ISA = ('PrintInfo');
8}
9
10my $arguments = [
11 { 'name' => "extract_email",
12 'desc' => "{EmailAddressExtractor.extract_email}",
13 'type' => "flag",
14 'reqd' => "no" },
15 { 'name' => "new_extract_email",
16 'desc' => "",
17 'type' => "flag",
18 'reqd' => "no",
19 'hiddengli' => "yes" }
20 ];
21
22my $options = { 'name' => "EmailAddressExtractor",
23 'desc' => "{EmailAddressExtractor.desc}",
24 'abstract' => "yes",
25 'inherits' => "yes",
26 'args' => $arguments };
27
28
29sub new {
30 my ($class) = shift (@_);
31 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
32 push(@$pluginlist, $class);
33
34 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
35 push(@{$hashArgOptLists->{"OptList"}},$options);
36
37 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, 1);
38
39 return bless $self, $class;
40
41}
42
43# extract metadata
44sub extract_email_metadata {
45
46 my $self = shift (@_);
47 my ($doc_obj) = @_;
48
49 if ($self->{'extract_email'}) {
50 my $thissection = $doc_obj->get_top_section();
51 while (defined $thissection) {
52 my $text = $doc_obj->get_text($thissection);
53 $self->extract_email (\$text, $doc_obj, $thissection) if $text =~ /./;
54 $thissection = $doc_obj->get_next_section ($thissection);
55 }
56 }
57
58}
59
60sub extract_email {
61 my $self = shift (@_);
62 my ($textref, $doc_obj, $thissection) = @_;
63 my $outhandle = $self->{'outhandle'};
64
65 gsprintf($outhandle, " {BasPlug.extracting_emails}...\n")
66 if ($self->{'verbosity'} > 2);
67
68 my @email = ($$textref =~ m/([-a-z0-9\.@+_=]+@(?:[-a-z0-9]+\.)+(?:com|org|edu|mil|int|net|[a-z][a-z]))/g);
69 @email = sort @email;
70
71# if($self->{"new_extract_email"} == 0)
72# {
73# my @email2 = ();
74# foreach my $address (@email)
75# {
76# if (!(join(" ",@email2) =~ m/(^| )$address( |$)/ ))
77# {
78# push @email2, $address;
79# $doc_obj->add_utf8_metadata ($thissection, "emailAddress", $address);
80# # print $outhandle " extracting $address\n"
81# &gsprintf($outhandle, " {BasPlug.extracting} $address\n")
82# if ($self->{'verbosity'} > 3);
83# }
84# }
85# }
86# else
87# {
88 my $hashExistMail = {};
89 foreach my $address (@email) {
90 if (!(defined $hashExistMail->{$address}))
91 {
92 $hashExistMail->{$address} = 1;
93 $doc_obj->add_utf8_metadata ($thissection, "emailAddress", $address);
94 gsprintf($outhandle, " {BasPlug.extracting} $address\n")
95 if ($self->{'verbosity'} > 3);
96 }
97 }
98 gsprintf($outhandle, " {BasPlug.done_email_extract}\n")
99 if ($self->{'verbosity'} > 2);
100}
101
102
1031;
Note: See TracBrowser for help on using the repository browser.