source: main/trunk/greenstone2/perllib/plugins/ReferPlugin.pm@ 31492

Last change on this file since 31492 was 31492, checked in by kjdon, 7 years ago

renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes

  • Property svn:keywords set to Author Date Id Revision
File size: 8.5 KB
Line 
1###########################################################################
2#
3# ReferPlugin.pm - a plugin for bibliography records in Refer format
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2000 Gordon W. Paynter
10# Copyright 1999-2000 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# ReferPlugin reads bibliography files in Refer format.
29#
30# by Gordon W. Paynter ([email protected]), November 2000
31#
32# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35# Boddie and others -- it's hard to tell what came from where, now.
36#
37#
38# ReferPlugin creates a document object for every reference in the file.
39# It is a subclass of SplitTextFile, so if there are multiple records, all
40# are read.
41#
42# Document text:
43# The document text consists of the reference in Refer format
44#
45# Metadata:
46# $Creator %A Author name
47# $Title %T Title of article of book
48# $Journal %J Title of Journal
49# $Booktitle %B Title of book containing the publication
50# $Report %R Type of Report, paper or thesis
51# $Volume %V Volume Number of Journal
52# $Number %N Number of Journal within Volume
53# $Editor %E Editor name
54# $Pages %P Page Number of article
55# $Publisher %I Name of Publisher
56# $Publisheraddr %C Publisher's address
57# $Date %D Date of publication
58# $Keywords %K Keywords associated with publication
59# $Abstract %X Abstract of publication
60# $Copyright %* Copyright information for the article
61#
62
63package ReferPlugin;
64
65use SplitTextFile;
66use MetadataRead;
67use strict;
68no strict 'refs'; # allow filehandles to be variables and viceversa
69
70# ReferPlugin is a sub-class of BaseImporter.
71sub BEGIN {
72 @ReferPlugin::ISA = ('MetadataRead', 'SplitTextFile');
73}
74
75my $arguments =
76 [ { 'name' => "process_exp",
77 'desc' => "{BaseImporter.process_exp}",
78 'type' => "regexp",
79 'deft' => &get_default_process_exp(),
80 'reqd' => "no" },
81 { 'name' => "split_exp",
82 'desc' => "{SplitTextFile.split_exp}",
83 'type' => "regexp",
84 'reqd' => "no",
85 'deft' => &get_default_split_exp() }
86 ];
87
88my $options = { 'name' => "ReferPlugin",
89 'desc' => "{ReferPlugin.desc}",
90 'abstract' => "no",
91 'inherits' => "yes",
92 'explodes' => "yes",
93 'args' => $arguments };
94
95# This plugin processes files with the suffix ".bib"
96sub get_default_process_exp {
97 return q^(?i)\.bib$^;
98}
99
100# This plugin splits the input text at blank lines
101sub get_default_split_exp {
102 return q^\n\s*\n^;
103}
104
105sub new {
106 my ($class) = shift (@_);
107 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
108 push(@$pluginlist, $class);
109
110 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
111 push(@{$hashArgOptLists->{"OptList"}},$options);
112
113 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
114
115 return bless $self, $class;
116}
117
118# The process function reads a single bibliographic record and stores
119# it as a new document.
120
121sub process {
122 my $self = shift (@_);
123 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
124 my $outhandle = $self->{'outhandle'};
125
126 # Check that we're dealing with a valid Refer file
127 return undef unless ($$textref =~ /^\s*%/);
128
129 my $cursection = $doc_obj->get_top_section();
130
131 my %field = ('H', 'Header',
132 'A', 'Creator',
133 'T', 'Title',
134 'J', 'Journal',
135 'B', 'Booktitle',
136 'R', 'Report',
137 'V', 'Volume',
138 'N', 'Number',
139 'E', 'Editor',
140 'P', 'Pages',
141 'I', 'Publisher',
142 'C', 'PublisherAddress',
143 'D', 'Date',
144 'O', 'OtherInformation',
145 'K', 'Keywords',
146 'X', 'Abstract',
147 '*', 'Copyright');
148
149 # Metadata fields
150 my %metadata;
151 my ($id, $Creator, $Keywords, $text);
152 my @lines = split(/\n+/, $$textref);
153
154
155 # Read and process each line in the bib file.
156 # Each file consists of a set of metadata items, one to each line
157 # with the Refer key followed by a space then the associated data
158 foreach my $line (@lines) {
159
160 # Add each line. Most lines consist of a field identifer and
161 # then data, and we simply store them, though we treat some
162 # of the fields a bit differently.
163
164 $line =~ s/\s+/ /g;
165 $text .= "$line\n";
166 # $ReferFormat .= "$line\n"; # what is this???
167
168 next unless ($line =~ /^%[A-Z\*]/);
169 $id = substr($line,1,1);
170 $line =~ s/^%. //;
171
172 # Add individual authors in "Lastname, Firstname" format.
173 # (The full set of authors will be added below as "Creator".)
174 if ($id eq "A") {
175
176 # Reformat and add author name
177 my @words = split(/ /, $line);
178 my $lastname = pop @words;
179 my $firstname = join(" ", @words);
180 my $fullname = $lastname . ", " . $firstname;
181
182 # Add each name to set of Authors
183 if ($fullname =~ /\w/) {
184 $fullname = &text_into_html($fullname);
185 $doc_obj->add_metadata ($cursection, "Author", $fullname);
186 }
187 }
188
189 # Add individual keywords.
190 # (The full set of authors will be added below as "Keywords".)
191 if ($id eq "K") {
192 my @keywordlist = split(/,/, $line);
193 foreach my $k (@keywordlist) {
194 $k = lc($k);
195 $k =~ s/\s*$//;
196 $k =~ s/^\s*//;
197 if ($k =~ /\w/) {
198 $k = &text_into_html($k);
199 $doc_obj->add_metadata ($cursection, "Keyword", $k);
200 }
201 }
202 }
203
204 # Add this line of metadata
205 $metadata{$id} .= "$line\n";
206 }
207
208
209
210 # Add the various field as metadata
211 my ($f, $name, $value);
212 foreach $f (keys %metadata) {
213
214 next unless (defined $field{$f});
215 next unless (defined $metadata{$f});
216
217 $name = $field{$f};
218 $value = $metadata{$f};
219
220 # Add the various field as metadata
221
222 # The Creator metadata is found by concatenating authors.
223 if ($f eq "A") {
224
225 my @authorlist = split(/\n/, $value);
226 my $lastauthor = pop @authorlist;
227 my $Creator = "";
228 if (scalar @authorlist) {
229 $Creator = join(", ", @authorlist) . " and $lastauthor";
230 } else {
231 $Creator = $lastauthor;
232 }
233
234 if ($Creator =~ /\w/) {
235 $Creator = &text_into_html($Creator);
236 $doc_obj->add_metadata ($cursection, "Creator", $Creator);
237 }
238 }
239
240 # The rest are added in a standard way
241 else {
242 $value = &text_into_html($value);
243 $doc_obj->add_metadata ($cursection, $name, $value);
244 }
245
246 # Books and Journals are additionally marked for display purposes
247 if ($f eq "B") {
248 $doc_obj->add_metadata($cursection, "BookConfOnly", 1);
249 } elsif ($f eq "J") {
250 $doc_obj->add_metadata($cursection, "JournalsOnly", 1);
251 }
252
253
254 }
255
256 # Add the text in refer format(all fields)
257 if ($text =~ /\w/) {
258 $text = &text_into_html($text);
259 $doc_obj->add_text ($cursection, $text);
260 }
261 # Add FileFormat as the metadata
262 $doc_obj->add_metadata($cursection,"FileFormat","Refer");
263
264 return 1; # processed the file
265}
266
2671;
268#
269# Convert a text string into HTML.
270#
271# The HTML is going to be inserted into a GML file, so
272# we have to be careful not to use symbols like ">",
273# which ocurs frequently in email messages (and use
274# &gt instead.
275#
276# This function also turns links and email addresses into hyperlinks,
277# and replaces carriage returns with <BR> tags (and multiple carriage
278# returns with <P> tags).
279#
280
281sub text_into_html {
282 my ($text) = @_;
283
284
285 # Convert problem charaters into HTML symbols
286 $text =~ s/&/&amp;/g;
287 $text =~ s/</&lt;/g;
288 $text =~ s/>/&gt;/g;
289 $text =~ s/\"/&quot;/g;
290 $text =~ s/\'/ /g;
291 $text =~ s/\+/ /g;
292 $text =~ s/\(/ /g;
293 $text =~ s/\)/ /g;
294
295 # convert email addresses and URLs into links
296 $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
297 $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
298
299 # Clean up whitespace and convert \n charaters to <BR> or <P>
300 $text =~ s/ +/ /g;
301 $text =~ s/\s*$//;
302 $text =~ s/^\s*//;
303 $text =~ s/\n/\n<BR>/g;
304 $text =~ s/<BR>\s*<BR>/<P>/g;
305
306 return $text;
307}
308
309
Note: See TracBrowser for help on using the repository browser.