source: trunk/gsdl/perllib/plugins/ReferPlug.pm@ 10254

Last change on this file since 10254 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1###########################################################################
2#
3# ReferPlug.pm - a plugin for bibliography records in Refer format
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2000 Gordon W. Paynter
10# Copyright 1999-2000 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# ReferPlug reads bibliography files in Refer format.
29#
30# by Gordon W. Paynter ([email protected]), November 2000
31#
32# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35# Boddie and others -- it's hard to tell what came from where, now.
36#
37#
38# ReferPlug creates a document object for every reference in the file.
39# It is a subclass of SplitPlug, so if there are multiple records, all
40# are read.
41#
42# Document text:
43# The document text consists of the reference in Refer format
44#
45# Metadata:
46# $Creator %A Author name
47# $Title %T Title of article of book
48# $Journal %J Title of Journal
49# $Booktitle %B Title of book containing the publication
50# $Report %R Type of Report, paper or thesis
51# $Volume %V Volume Number of Journal
52# $Number %N Number of Journal within Volume
53# $Editor %E Editor name
54# $Pages %P Page Number of article
55# $Publisher %I Name of Publisher
56# $Publisheraddr %C Publisher's address
57# $Date %D Date of publication
58# $Keywords %K Keywords associated with publication
59# $Abstract %X Abstract of publication
60# $Copyright %* Copyright information for the article
61#
62
63# 12/05/02 Added usage datastructure - John Thompson
64
65package ReferPlug;
66
67use SplitPlug;
68use strict;
69no strict 'refs'; # allow filehandles to be variables and viceversa
70
71# ReferPlug is a sub-class of BasPlug.
72sub BEGIN {
73 @ReferPlug::ISA = ('SplitPlug');
74}
75
76my $arguments =
77 [ { 'name' => "process_exp",
78 'desc' => "{BasPlug.process_exp}",
79 'type' => "regexp",
80 'deft' => &get_default_process_exp(),
81 'reqd' => "no" },
82 { 'name' => "split_exp",
83 'desc' => "{SplitPlug.split_exp}",
84 'type' => "regexp",
85 'reqd' => "no",
86 'deft' => &get_default_split_exp() }
87 ];
88
89my $options = { 'name' => "ReferPlug",
90 'desc' => "{ReferPlug.desc}",
91 'abstract' => "no",
92 'inherits' => "yes",
93 'args' => $arguments };
94
95# This plugin processes files with the suffix ".bib"
96sub get_default_process_exp {
97 return q^(?i)\.bib$^;
98}
99
100# This plugin splits the input text at blank lines
101sub get_default_split_exp {
102 return q^\n\s*\n^;
103}
104
105sub new {
106 my ($class) = shift (@_);
107 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
108 push(@$pluginlist, $class);
109
110 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
111 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
112
113 my $self = (defined $hashArgOptLists)? new SplitPlug($pluginlist,$inputargs,$hashArgOptLists): new SplitPlug($pluginlist,$inputargs);
114
115 return bless $self, $class;
116}
117
118# The process function reads a single bibliogrphic record and stores
119# it as a new document.
120
121sub process {
122 my $self = shift (@_);
123 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
124 my $outhandle = $self->{'outhandle'};
125
126 # Check that we're dealing with a valid Refer file
127 return undef unless ($$textref =~ /^\s*%/);
128
129 my $cursection = $doc_obj->get_top_section();
130 # Report that we're processing the file
131 print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli);
132 print $outhandle "ReferPlug: processing $file\n"
133 if ($self->{'verbosity'}) > 1;
134
135 my %field = ('H', 'Header',
136 'A', 'Creator',
137 'T', 'Title',
138 'J', 'Journal',
139 'B', 'Booktitle',
140 'R', 'Report',
141 'V', 'Volume',
142 'N', 'Number',
143 'E', 'Editor',
144 'P', 'Pages',
145 'I', 'Publisher',
146 'C', 'PublisherAddress',
147 'D', 'Date',
148 'O', 'OtherInformation',
149 'K', 'Keywords',
150 'X', 'Abstract',
151 '*', 'Copyright');
152
153 # Metadata fields
154 my %metadata;
155 my ($id, $Creator, $Keywords, $text);
156 my @lines = split(/\n+/, $$textref);
157
158
159 # Read and process each line in the bib file.
160 # Each file consists of a set of metadata items, one to each line
161 # with the Refer key followed by a space then the associated data
162 foreach my $line (@lines) {
163
164 # Add each line. Most lines consist of a field identifer and
165 # then data, and we simply store them, though we treat some
166 # of the fields a bit differently.
167
168 $line =~ s/\s+/ /g;
169 $text .= "$line\n";
170 # $ReferFormat .= "$line\n"; # what is this???
171
172 next unless ($line =~ /^%[A-Z\*]/);
173 $id = substr($line,1,1);
174 $line =~ s/^%. //;
175
176 # Add individual authors in "Lastname, Firstname" format.
177 # (The full set of authors will be added below as "Creator".)
178 if ($id eq "A") {
179
180 # Reformat and add author name
181 my @words = split(/ /, $line);
182 my $lastname = pop @words;
183 my $firstname = join(" ", @words);
184 my $fullname = $lastname . ", " . $firstname;
185
186 # Add each name to set of Authors
187 if ($fullname =~ /\w/) {
188 $fullname = &text_into_html($fullname);
189 $doc_obj->add_metadata ($cursection, "Author", $fullname);
190 }
191 }
192 # Add FileFormat as the metadata
193 $doc_obj->add_metadata($cursection,"FileFormat","Refer");
194
195 # Add individual keywords.
196 # (The full set of authors will be added below as "Keywords".)
197 if ($id eq "K") {
198 my @keywordlist = split(/,/, $line);
199 foreach my $k (@keywordlist) {
200 $k = lc($k);
201 $k =~ s/\s*$//;
202 $k =~ s/^\s*//;
203 if ($k =~ /\w/) {
204 $k = &text_into_html($k);
205 $doc_obj->add_metadata ($cursection, "Keyword", $k);
206 }
207 }
208 }
209
210 # Add this line of metadata
211 $metadata{$id} .= "$line\n";
212 }
213
214
215
216 # Add the various field as metadata
217 my ($f, $name, $value);
218 foreach $f (keys %metadata) {
219
220 next unless (defined $field{$f});
221 next unless (defined $metadata{$f});
222
223 $name = $field{$f};
224 $value = $metadata{$f};
225
226 # Add the various field as metadata
227
228 # The Creator metadata is found by concatenating authors.
229 if ($f eq "A") {
230
231 my @authorlist = split(/\n/, $value);
232 my $lastauthor = pop @authorlist;
233 my $Creator = "";
234 if (scalar @authorlist) {
235 $Creator = join(", ", @authorlist) . "and $lastauthor";
236 } else {
237 $Creator = $lastauthor;
238 }
239
240 if ($Creator =~ /\w/) {
241 $Creator = &text_into_html($Creator);
242 $doc_obj->add_metadata ($cursection, "Creator", $Creator);
243 }
244 }
245
246 # The rest are added in a standard way
247 else {
248 $value = &text_into_html($value);
249 $doc_obj->add_metadata ($cursection, $name, $value);
250 }
251
252 # Books and Journals are additionally marked for display purposes
253 if ($f eq "B") {
254 $doc_obj->add_metadata($cursection, "BookConfOnly", 1);
255 } elsif ($f eq "J") {
256 $doc_obj->add_metadata($cursection, "JournalsOnly", 1);
257 }
258
259
260 }
261
262 # Add the text in refer format(all fields)
263 if ($text =~ /\w/) {
264 $text = &text_into_html($text);
265 $doc_obj->add_text ($cursection, $text);
266 }
267
268 return 1; # processed the file
269}
270
2711;
272#
273# Convert a text string into HTML.
274#
275# The HTML is going to be inserted into a GML file, so
276# we have to be careful not to use symbols like ">",
277# which ocurs frequently in email messages (and use
278# &gt instead.
279#
280# This function also turns links and email addresses into hyperlinks,
281# and replaces carriage returns with <BR> tags (and multiple carriage
282# returns with <P> tags).
283#
284
285sub text_into_html {
286 my ($text) = @_;
287
288
289 # Convert problem charaters into HTML symbols
290 $text =~ s/&/&amp;/g;
291 $text =~ s/</&lt;/g;
292 $text =~ s/>/&gt;/g;
293 $text =~ s/\"/&quot;/g;
294 $text =~ s/\'/ /g;
295 $text =~ s/\+/ /g;
296 $text =~ s/\(/ /g;
297 $text =~ s/\)/ /g;
298
299 # convert email addresses and URLs into links
300 $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
301 $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
302
303 # Clean up whitespace and convert \n charaters to <BR> or <P>
304 $text =~ s/ +/ /g;
305 $text =~ s/\s*$//;
306 $text =~ s/^\s*//;
307 $text =~ s/\n/\n<BR>/g;
308 $text =~ s/<BR>\s*<BR>/<P>/g;
309
310 return $text;
311}
312
313
Note: See TracBrowser for help on using the repository browser.