Context Navigation

source: main/tags/2.71/gsdl/perllib/plugins/ReferPlug.pm@ 21105

Last change on this file since 21105 was 12291, checked in by kjdon, 18 years ago
set explodes to yes, added a space before 'and' when concatenation authors
Property svn:keywords set to `Author Date Id Revision`
File size: 8.7 KB

Line
1	###########################################################################
2	#
3	# ReferPlug.pm - a plugin for bibliography records in Refer format
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright 2000 Gordon W. Paynter
10	# Copyright 1999-2000 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# ReferPlug reads bibliography files in Refer format.
29	#
30	# by Gordon W. Paynter ([email protected]), November 2000
31	#
32	# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33	# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34	# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35	# Boddie and others -- it's hard to tell what came from where, now.
36	#
37	#
38	# ReferPlug creates a document object for every reference in the file.
39	# It is a subclass of SplitPlug, so if there are multiple records, all
40	# are read.
41	#
42	# Document text:
43	# The document text consists of the reference in Refer format
44	#
45	# Metadata:
46	# $Creator %A Author name
47	# $Title %T Title of article of book
48	# $Journal %J Title of Journal
49	# $Booktitle %B Title of book containing the publication
50	# $Report %R Type of Report, paper or thesis
51	# $Volume %V Volume Number of Journal
52	# $Number %N Number of Journal within Volume
53	# $Editor %E Editor name
54	# $Pages %P Page Number of article
55	# $Publisher %I Name of Publisher
56	# $Publisheraddr %C Publisher's address
57	# $Date %D Date of publication
58	# $Keywords %K Keywords associated with publication
59	# $Abstract %X Abstract of publication
60	# $Copyright %* Copyright information for the article
61	#
62
63	# 12/05/02 Added usage datastructure - John Thompson
64
65	package ReferPlug;
66
67	use SplitPlug;
68	use strict;
69	no strict 'refs'; # allow filehandles to be variables and viceversa
70
71	# ReferPlug is a sub-class of BasPlug.
72	sub BEGIN {
73	@ReferPlug::ISA = ('SplitPlug');
74	}
75
76	my $arguments =
77	[ { 'name' => "process_exp",
78	'desc' => "{BasPlug.process_exp}",
79	'type' => "regexp",
80	'deft' => &get_default_process_exp(),
81	'reqd' => "no" },
82	{ 'name' => "split_exp",
83	'desc' => "{SplitPlug.split_exp}",
84	'type' => "regexp",
85	'reqd' => "no",
86	'deft' => &get_default_split_exp() }
87	];
88
89	my $options = { 'name' => "ReferPlug",
90	'desc' => "{ReferPlug.desc}",
91	'abstract' => "no",
92	'inherits' => "yes",
93	'explodes' => "yes",
94	'args' => $arguments };
95
96	# This plugin processes files with the suffix ".bib"
97	sub get_default_process_exp {
98	return q^(?i)\.bib$^;
99	}
100
101	# This plugin splits the input text at blank lines
102	sub get_default_split_exp {
103	return q^\n\s*\n^;
104	}
105
106	sub new {
107	my ($class) = shift (@_);
108	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
109	push(@$pluginlist, $class);
110
111	if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
112	if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
113
114	my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
115
116	return bless $self, $class;
117	}
118
119	# The process function reads a single bibliogrphic record and stores
120	# it as a new document.
121
122	sub process {
123	my $self = shift (@_);
124	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
125	my $outhandle = $self->{'outhandle'};
126
127	# Check that we're dealing with a valid Refer file
128	return undef unless ($$textref =~ /^\s*%/);
129
130	my $cursection = $doc_obj->get_top_section();
131	# Report that we're processing the file
132	print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli);
133	print $outhandle "ReferPlug: processing $file\n"
134	if ($self->{'verbosity'}) > 1;
135
136	my %field = ('H', 'Header',
137	'A', 'Creator',
138	'T', 'Title',
139	'J', 'Journal',
140	'B', 'Booktitle',
141	'R', 'Report',
142	'V', 'Volume',
143	'N', 'Number',
144	'E', 'Editor',
145	'P', 'Pages',
146	'I', 'Publisher',
147	'C', 'PublisherAddress',
148	'D', 'Date',
149	'O', 'OtherInformation',
150	'K', 'Keywords',
151	'X', 'Abstract',
152	'*', 'Copyright');
153
154	# Metadata fields
155	my %metadata;
156	my ($id, $Creator, $Keywords, $text);
157	my @lines = split(/\n+/, $$textref);
158
159
160	# Read and process each line in the bib file.
161	# Each file consists of a set of metadata items, one to each line
162	# with the Refer key followed by a space then the associated data
163	foreach my $line (@lines) {
164
165	# Add each line. Most lines consist of a field identifer and
166	# then data, and we simply store them, though we treat some
167	# of the fields a bit differently.
168
169	$line =~ s/\s+/ /g;
170	$text .= "$line\n";
171	# $ReferFormat .= "$line\n"; # what is this???
172
173	next unless ($line =~ /^%[A-Z\*]/);
174	$id = substr($line,1,1);
175	$line =~ s/^%. //;
176
177	# Add individual authors in "Lastname, Firstname" format.
178	# (The full set of authors will be added below as "Creator".)
179	if ($id eq "A") {
180
181	# Reformat and add author name
182	my @words = split(/ /, $line);
183	my $lastname = pop @words;
184	my $firstname = join(" ", @words);
185	my $fullname = $lastname . ", " . $firstname;
186
187	# Add each name to set of Authors
188	if ($fullname =~ /\w/) {
189	$fullname = &text_into_html($fullname);
190	$doc_obj->add_metadata ($cursection, "Author", $fullname);
191	}
192	}
193
194	# Add individual keywords.
195	# (The full set of authors will be added below as "Keywords".)
196	if ($id eq "K") {
197	my @keywordlist = split(/,/, $line);
198	foreach my $k (@keywordlist) {
199	$k = lc($k);
200	$k =~ s/\s*$//;
201	$k =~ s/^\s*//;
202	if ($k =~ /\w/) {
203	$k = &text_into_html($k);
204	$doc_obj->add_metadata ($cursection, "Keyword", $k);
205	}
206	}
207	}
208
209	# Add this line of metadata
210	$metadata{$id} .= "$line\n";
211	}
212
213
214
215	# Add the various field as metadata
216	my ($f, $name, $value);
217	foreach $f (keys %metadata) {
218
219	next unless (defined $field{$f});
220	next unless (defined $metadata{$f});
221
222	$name = $field{$f};
223	$value = $metadata{$f};
224
225	# Add the various field as metadata
226
227	# The Creator metadata is found by concatenating authors.
228	if ($f eq "A") {
229
230	my @authorlist = split(/\n/, $value);
231	my $lastauthor = pop @authorlist;
232	my $Creator = "";
233	if (scalar @authorlist) {
234	$Creator = join(", ", @authorlist) . " and $lastauthor";
235	} else {
236	$Creator = $lastauthor;
237	}
238
239	if ($Creator =~ /\w/) {
240	$Creator = &text_into_html($Creator);
241	$doc_obj->add_metadata ($cursection, "Creator", $Creator);
242	}
243	}
244
245	# The rest are added in a standard way
246	else {
247	$value = &text_into_html($value);
248	$doc_obj->add_metadata ($cursection, $name, $value);
249	}
250
251	# Books and Journals are additionally marked for display purposes
252	if ($f eq "B") {
253	$doc_obj->add_metadata($cursection, "BookConfOnly", 1);
254	} elsif ($f eq "J") {
255	$doc_obj->add_metadata($cursection, "JournalsOnly", 1);
256	}
257
258
259	}
260
261	# Add the text in refer format(all fields)
262	if ($text =~ /\w/) {
263	$text = &text_into_html($text);
264	$doc_obj->add_text ($cursection, $text);
265	}
266	# Add FileFormat as the metadata
267	$doc_obj->add_metadata($cursection,"FileFormat","Refer");
268
269	return 1; # processed the file
270	}
271
272	1;
273	#
274	# Convert a text string into HTML.
275	#
276	# The HTML is going to be inserted into a GML file, so
277	# we have to be careful not to use symbols like ">",
278	# which ocurs frequently in email messages (and use
279	# &gt instead.
280	#
281	# This function also turns links and email addresses into hyperlinks,
282	# and replaces carriage returns with <BR> tags (and multiple carriage
283	# returns with <P> tags).
284	#
285
286	sub text_into_html {
287	my ($text) = @_;
288
289
290	# Convert problem charaters into HTML symbols
291	$text =~ s/&/&/g;
292	$text =~ s/</</g;
293	$text =~ s/>/>/g;
294	$text =~ s/\"/"/g;
295	$text =~ s/\'/ /g;
296	$text =~ s/\+/ /g;
297	$text =~ s/\(/ /g;
298	$text =~ s/\)/ /g;
299
300	# convert email addresses and URLs into links
301	$text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
302	$text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
303
304	# Clean up whitespace and convert \n charaters to <BR> or <P>
305	$text =~ s/ +/ /g;
306	$text =~ s/\s*$//;
307	$text =~ s/^\s*//;
308	$text =~ s/\n/\n<BR>/g;
309	$text =~ s/<BR>\s*<BR>/<P>/g;
310
311	return $text;
312	}
313
314

Note: See TracBrowser for help on using the repository browser.

Download in other formats: