source: gsdl/trunk/perllib/plugins/ReferPlugin.pm@ 15872

Last change on this file since 15872 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1###########################################################################
2#
3# ReferPlugin.pm - a plugin for bibliography records in Refer format
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2000 Gordon W. Paynter
10# Copyright 1999-2000 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# ReferPlugin reads bibliography files in Refer format.
29#
30# by Gordon W. Paynter ([email protected]), November 2000
31#
32# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35# Boddie and others -- it's hard to tell what came from where, now.
36#
37#
38# ReferPlugin creates a document object for every reference in the file.
39# It is a subclass of SplitTextFile, so if there are multiple records, all
40# are read.
41#
42# Document text:
43# The document text consists of the reference in Refer format
44#
45# Metadata:
46# $Creator %A Author name
47# $Title %T Title of article of book
48# $Journal %J Title of Journal
49# $Booktitle %B Title of book containing the publication
50# $Report %R Type of Report, paper or thesis
51# $Volume %V Volume Number of Journal
52# $Number %N Number of Journal within Volume
53# $Editor %E Editor name
54# $Pages %P Page Number of article
55# $Publisher %I Name of Publisher
56# $Publisheraddr %C Publisher's address
57# $Date %D Date of publication
58# $Keywords %K Keywords associated with publication
59# $Abstract %X Abstract of publication
60# $Copyright %* Copyright information for the article
61#
62
63package ReferPlugin;
64
65use SplitTextFile;
66use strict;
67no strict 'refs'; # allow filehandles to be variables and viceversa
68
69# ReferPlugin is a sub-class of BasePlugin.
70sub BEGIN {
71 @ReferPlugin::ISA = ('SplitTextFile');
72}
73
74my $arguments =
75 [ { 'name' => "process_exp",
76 'desc' => "{BasePlugin.process_exp}",
77 'type' => "regexp",
78 'deft' => &get_default_process_exp(),
79 'reqd' => "no" },
80 { 'name' => "split_exp",
81 'desc' => "{SplitTextFile.split_exp}",
82 'type' => "regexp",
83 'reqd' => "no",
84 'deft' => &get_default_split_exp() }
85 ];
86
87my $options = { 'name' => "ReferPlugin",
88 'desc' => "{ReferPlugin.desc}",
89 'abstract' => "no",
90 'inherits' => "yes",
91 'explodes' => "yes",
92 'args' => $arguments };
93
94# This plugin processes files with the suffix ".bib"
95sub get_default_process_exp {
96 return q^(?i)\.bib$^;
97}
98
99# This plugin splits the input text at blank lines
100sub get_default_split_exp {
101 return q^\n\s*\n^;
102}
103
104sub new {
105 my ($class) = shift (@_);
106 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
107 push(@$pluginlist, $class);
108
109 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
110 push(@{$hashArgOptLists->{"OptList"}},$options);
111
112 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
113
114 return bless $self, $class;
115}
116
117# The process function reads a single bibliogrphic record and stores
118# it as a new document.
119
120sub process {
121 my $self = shift (@_);
122 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
123 my $outhandle = $self->{'outhandle'};
124
125 # Check that we're dealing with a valid Refer file
126 return undef unless ($$textref =~ /^\s*%/);
127
128 my $cursection = $doc_obj->get_top_section();
129 # Report that we're processing the file
130 print STDERR "<Processing n='$file' p='ReferPlugin'>\n" if ($gli);
131 print $outhandle "ReferPlugin: processing $file\n"
132 if ($self->{'verbosity'}) > 1;
133
134 my %field = ('H', 'Header',
135 'A', 'Creator',
136 'T', 'Title',
137 'J', 'Journal',
138 'B', 'Booktitle',
139 'R', 'Report',
140 'V', 'Volume',
141 'N', 'Number',
142 'E', 'Editor',
143 'P', 'Pages',
144 'I', 'Publisher',
145 'C', 'PublisherAddress',
146 'D', 'Date',
147 'O', 'OtherInformation',
148 'K', 'Keywords',
149 'X', 'Abstract',
150 '*', 'Copyright');
151
152 # Metadata fields
153 my %metadata;
154 my ($id, $Creator, $Keywords, $text);
155 my @lines = split(/\n+/, $$textref);
156
157
158 # Read and process each line in the bib file.
159 # Each file consists of a set of metadata items, one to each line
160 # with the Refer key followed by a space then the associated data
161 foreach my $line (@lines) {
162
163 # Add each line. Most lines consist of a field identifer and
164 # then data, and we simply store them, though we treat some
165 # of the fields a bit differently.
166
167 $line =~ s/\s+/ /g;
168 $text .= "$line\n";
169 # $ReferFormat .= "$line\n"; # what is this???
170
171 next unless ($line =~ /^%[A-Z\*]/);
172 $id = substr($line,1,1);
173 $line =~ s/^%. //;
174
175 # Add individual authors in "Lastname, Firstname" format.
176 # (The full set of authors will be added below as "Creator".)
177 if ($id eq "A") {
178
179 # Reformat and add author name
180 my @words = split(/ /, $line);
181 my $lastname = pop @words;
182 my $firstname = join(" ", @words);
183 my $fullname = $lastname . ", " . $firstname;
184
185 # Add each name to set of Authors
186 if ($fullname =~ /\w/) {
187 $fullname = &text_into_html($fullname);
188 $doc_obj->add_metadata ($cursection, "Author", $fullname);
189 }
190 }
191
192 # Add individual keywords.
193 # (The full set of authors will be added below as "Keywords".)
194 if ($id eq "K") {
195 my @keywordlist = split(/,/, $line);
196 foreach my $k (@keywordlist) {
197 $k = lc($k);
198 $k =~ s/\s*$//;
199 $k =~ s/^\s*//;
200 if ($k =~ /\w/) {
201 $k = &text_into_html($k);
202 $doc_obj->add_metadata ($cursection, "Keyword", $k);
203 }
204 }
205 }
206
207 # Add this line of metadata
208 $metadata{$id} .= "$line\n";
209 }
210
211
212
213 # Add the various field as metadata
214 my ($f, $name, $value);
215 foreach $f (keys %metadata) {
216
217 next unless (defined $field{$f});
218 next unless (defined $metadata{$f});
219
220 $name = $field{$f};
221 $value = $metadata{$f};
222
223 # Add the various field as metadata
224
225 # The Creator metadata is found by concatenating authors.
226 if ($f eq "A") {
227
228 my @authorlist = split(/\n/, $value);
229 my $lastauthor = pop @authorlist;
230 my $Creator = "";
231 if (scalar @authorlist) {
232 $Creator = join(", ", @authorlist) . " and $lastauthor";
233 } else {
234 $Creator = $lastauthor;
235 }
236
237 if ($Creator =~ /\w/) {
238 $Creator = &text_into_html($Creator);
239 $doc_obj->add_metadata ($cursection, "Creator", $Creator);
240 }
241 }
242
243 # The rest are added in a standard way
244 else {
245 $value = &text_into_html($value);
246 $doc_obj->add_metadata ($cursection, $name, $value);
247 }
248
249 # Books and Journals are additionally marked for display purposes
250 if ($f eq "B") {
251 $doc_obj->add_metadata($cursection, "BookConfOnly", 1);
252 } elsif ($f eq "J") {
253 $doc_obj->add_metadata($cursection, "JournalsOnly", 1);
254 }
255
256
257 }
258
259 # Add the text in refer format(all fields)
260 if ($text =~ /\w/) {
261 $text = &text_into_html($text);
262 $doc_obj->add_text ($cursection, $text);
263 }
264 # Add FileFormat as the metadata
265 $doc_obj->add_metadata($cursection,"FileFormat","Refer");
266
267 return 1; # processed the file
268}
269
2701;
271#
272# Convert a text string into HTML.
273#
274# The HTML is going to be inserted into a GML file, so
275# we have to be careful not to use symbols like ">",
276# which ocurs frequently in email messages (and use
277# &gt instead.
278#
279# This function also turns links and email addresses into hyperlinks,
280# and replaces carriage returns with <BR> tags (and multiple carriage
281# returns with <P> tags).
282#
283
284sub text_into_html {
285 my ($text) = @_;
286
287
288 # Convert problem charaters into HTML symbols
289 $text =~ s/&/&amp;/g;
290 $text =~ s/</&lt;/g;
291 $text =~ s/>/&gt;/g;
292 $text =~ s/\"/&quot;/g;
293 $text =~ s/\'/ /g;
294 $text =~ s/\+/ /g;
295 $text =~ s/\(/ /g;
296 $text =~ s/\)/ /g;
297
298 # convert email addresses and URLs into links
299 $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
300 $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
301
302 # Clean up whitespace and convert \n charaters to <BR> or <P>
303 $text =~ s/ +/ /g;
304 $text =~ s/\s*$//;
305 $text =~ s/^\s*//;
306 $text =~ s/\n/\n<BR>/g;
307 $text =~ s/<BR>\s*<BR>/<P>/g;
308
309 return $text;
310}
311
312
Note: See TracBrowser for help on using the repository browser.