source: main/trunk/greenstone2/perllib/plugins/DateExtractor.pm@ 22597

Last change on this file since 22597 was 16025, checked in by kjdon, 16 years ago

added license info

  • Property svn:executable set to *
File size: 2.9 KB
Line 
1###########################################################################
2#
3# DateExtractor - helper plugin that extracts historical dates from text
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2008 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package DateExtractor;
28
29use DateExtract;
30use PrintInfo;
31use strict;
32
33BEGIN {
34 @DateExtractor::ISA = ('PrintInfo');
35}
36
37my $arguments = [
38 { 'name' => "extract_historical_years",
39 'desc' => "{DateExtractor.extract_historical_years}",
40 'type' => "flag",
41 'reqd' => "no" },
42 { 'name' => "maximum_year",
43 'desc' => "{DateExtractor.maximum_year}",
44 'type' => "int",
45 'deft' => (localtime)[5]+1900,
46 'char_length' => "4",
47 #'range' => "2,100",
48 'reqd' => "no"},
49 { 'name' => "maximum_century",
50 'desc' => "{DateExtractor.maximum_century}",
51 'type' => "string",
52 'deft' => "-1",
53 'reqd' => "no" },
54 { 'name' => "no_bibliography",
55 'desc' => "{DateExtractor.no_bibliography}",
56 'type' => "flag",
57 'reqd' => "no"},
58 ];
59
60my $options = { 'name' => "DateExtractor",
61 'desc' => "{DateExtractor.desc}",
62 'abstract' => "yes",
63 'inherits' => "yes",
64 'args' => $arguments };
65
66
67sub new {
68 my ($class) = shift (@_);
69 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
70 push(@$pluginlist, $class);
71
72 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
73 push(@{$hashArgOptLists->{"OptList"}},$options);
74
75 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, 1);
76
77 return bless $self, $class;
78
79}
80
81
82# extract metadata
83sub extract_date_metadata {
84
85 my $self = shift (@_);
86 my ($doc_obj) = @_;
87
88 if($self->{'extract_historical_years'}) {
89 my $thissection = $doc_obj->get_top_section();
90 while (defined $thissection) {
91
92 my $text = $doc_obj->get_text($thissection);
93 &DateExtract::get_date_metadata($text, $doc_obj,
94 $thissection,
95 $self->{'no_bibliography'},
96 $self->{'maximum_year'},
97 $self->{'maximum_century'});
98 $thissection = $doc_obj->get_next_section ($thissection);
99 }
100 }
101}
102
103
1041;
Note: See TracBrowser for help on using the repository browser.