source: gsdl/trunk/perllib/plugins/DateExtractor.pm@ 15867

Last change on this file since 15867 was 15867, checked in by kjdon, 16 years ago

plugin overhaul: automatic metadata extraction moved out of BasPlug into several extractor plugins (Keyphrase, Date, Acronym, EmailAddress Extractors). These are used by the AutoExtractMetadata plugin to add this functionality to BasePlugin (using multiple inheritance)

  • Property svn:executable set to *
File size: 1.7 KB
Line 
1package DateExtractor;
2
3use DateExtract;
4use PrintInfo;
5
6BEGIN {
7 @DateExtractor::ISA = ('PrintInfo');
8}
9
10my $arguments = [
11 { 'name' => "extract_historical_years",
12 'desc' => "{DateExtractor.extract_historical_years}",
13 'type' => "flag",
14 'reqd' => "no" },
15 { 'name' => "maximum_year",
16 'desc' => "{DateExtractor.maximum_year}",
17 'type' => "int",
18 'deft' => (localtime)[5]+1900,
19 'char_length' => "4",
20 #'range' => "2,100",
21 'reqd' => "no"},
22 { 'name' => "maximum_century",
23 'desc' => "{DateExtractor.maximum_century}",
24 'type' => "string",
25 'deft' => "-1",
26 'reqd' => "no" },
27 { 'name' => "no_bibliography",
28 'desc' => "{DateExtractor.no_bibliography}",
29 'type' => "flag",
30 'reqd' => "no"},
31 ];
32
33my $options = { 'name' => "DateExtractor",
34 'desc' => "{DateExtractor.desc}",
35 'abstract' => "yes",
36 'inherits' => "yes",
37 'args' => $arguments };
38
39
40sub new {
41 my ($class) = shift (@_);
42 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
43 push(@$pluginlist, $class);
44
45 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
46 push(@{$hashArgOptLists->{"OptList"}},$options);
47
48 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
49
50 return bless $self, $class;
51
52}
53
54
55# extract metadata
56sub extract_date_metadata {
57
58 my $self = shift (@_);
59 my ($doc_obj) = @_;
60
61 if($self->{'extract_historical_years'}) {
62 my $thissection = $doc_obj->get_top_section();
63 while (defined $thissection) {
64
65 my $text = $doc_obj->get_text($thissection);
66 &DateExtract::get_date_metadata($text, $doc_obj,
67 $thissection,
68 $self->{'no_bibliography'},
69 $self->{'maximum_year'},
70 $self->{'maximum_century'});
71 $thissection = $doc_obj->get_next_section ($thissection);
72 }
73 }
74}
75
76
771;
Note: See TracBrowser for help on using the repository browser.