Context Navigation

source: trunk/gsdl/perllib/plugins/ReferPlug.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago
Jeffrey's new parsing modifications, committed approx 6 July, 15.16
Property svn:keywords set to `Author Date Id Revision`
File size: 8.6 KB

Line
1	###########################################################################
2	#
3	# ReferPlug.pm - a plugin for bibliography records in Refer format
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright 2000 Gordon W. Paynter
10	# Copyright 1999-2000 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# ReferPlug reads bibliography files in Refer format.
29	#
30	# by Gordon W. Paynter ([email protected]), November 2000
31	#
32	# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33	# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34	# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35	# Boddie and others -- it's hard to tell what came from where, now.
36	#
37	#
38	# ReferPlug creates a document object for every reference in the file.
39	# It is a subclass of SplitPlug, so if there are multiple records, all
40	# are read.
41	#
42	# Document text:
43	# The document text consists of the reference in Refer format
44	#
45	# Metadata:
46	# $Creator %A Author name
47	# $Title %T Title of article of book
48	# $Journal %J Title of Journal
49	# $Booktitle %B Title of book containing the publication
50	# $Report %R Type of Report, paper or thesis
51	# $Volume %V Volume Number of Journal
52	# $Number %N Number of Journal within Volume
53	# $Editor %E Editor name
54	# $Pages %P Page Number of article
55	# $Publisher %I Name of Publisher
56	# $Publisheraddr %C Publisher's address
57	# $Date %D Date of publication
58	# $Keywords %K Keywords associated with publication
59	# $Abstract %X Abstract of publication
60	# $Copyright %* Copyright information for the article
61	#
62
63	# 12/05/02 Added usage datastructure - John Thompson
64
65	package ReferPlug;
66
67	use SplitPlug;
68
69	# ReferPlug is a sub-class of BasPlug.
70	sub BEGIN {
71	@ISA = ('SplitPlug');
72	}
73
74	my $arguments =
75	[ { 'name' => "process_exp",
76	'desc' => "{BasPlug.process_exp}",
77	'type' => "regexp",
78	'deft' => &get_default_process_exp(),
79	'reqd' => "no" },
80	{ 'name' => "split_exp",
81	'desc' => "{SplitPlug.split_exp}",
82	'type' => "regexp",
83	'reqd' => "no",
84	'deft' => &get_default_split_exp() }
85	];
86
87	my $options = { 'name' => "ReferPlug",
88	'desc' => "{ReferPlug.desc}",
89	'abstract' => "no",
90	'inherits' => "yes",
91	'args' => $arguments };
92
93	# This plugin processes files with the suffix ".bib"
94	sub get_default_process_exp {
95	return q^(?i)\.bib$^;
96	}
97
98	# This plugin splits the input text at blank lines
99	sub get_default_split_exp {
100	return q^\n\s*\n^;
101	}
102
103	sub new {
104	my ($class) = shift (@_);
105	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
106	push(@$pluginlist, $class);
107
108	if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
109	if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
110
111	my $self = (defined $hashArgOptLists)? new SplitPlug($pluginlist,$inputargs,$hashArgOptLists): new SplitPlug($pluginlist,$inputargs);
112
113	return bless $self, $class;
114	}
115
116	# The process function reads a single bibliogrphic record and stores
117	# it as a new document.
118
119	sub process {
120	my $self = shift (@_);
121	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
122	my $outhandle = $self->{'outhandle'};
123
124	# Check that we're dealing with a valid Refer file
125	return undef unless ($$textref =~ /^\s*%/);
126
127	# Report that we're processing the file
128	print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli);
129	print $outhandle "ReferPlug: processing $file\n"
130	if ($self->{'verbosity'}) > 1;
131
132	my %field = ('H', 'Header',
133	'A', 'Creator',
134	'T', 'Title',
135	'J', 'Journal',
136	'B', 'Booktitle',
137	'R', 'Report',
138	'V', 'Volume',
139	'N', 'Number',
140	'E', 'Editor',
141	'P', 'Pages',
142	'I', 'Publisher',
143	'C', 'PublisherAddress',
144	'D', 'Date',
145	'O', 'OtherInformation',
146	'K', 'Keywords',
147	'X', 'Abstract',
148	'*', 'Copyright');
149
150	# Metadata fields
151	my %metadata;
152	my ($id, $Creator, $Keywords, $text);
153	my @lines = split(/\n+/, $$textref);
154
155
156	# Read and process each line in the bib file.
157	# Each file consists of a set of metadata items, one to each line
158	# with the Refer key followed by a space then the associated data
159	foreach my $line (@lines) {
160
161	# Add each line. Most lines consist of a field identifer and
162	# then data, and we simply store them, though we treat some
163	# of the fields a bit differently.
164
165	$line =~ s/\s+/ /g;
166	$text .= "$line\n";
167	$ReferFormat .= "$line\n";
168
169	next unless ($line =~ /^%[A-Z\*]/);
170	$id = substr($line,1,1);
171	$line =~ s/^%. //;
172
173	# Add individual authors in "Lastname, Firstname" format.
174	# (The full set of authors will be added below as "Creator".)
175	if ($id eq "A") {
176
177	# Reformat and add author name
178	my @words = split(/ /, $line);
179	my $lastname = pop @words;
180	my $firstname = join(" ", @words);
181	my $fullname = $lastname . ", " . $firstname;
182
183	# Add each name to set of Authors
184	if ($fullname =~ /\w/) {
185	$fullname = &text_into_html($fullname);
186	$doc_obj->add_metadata ($cursection, "Author", $fullname);
187	}
188	}
189	# Add FileFormat as the metadata
190	$doc_obj->add_metadata($cursection,"FileFormat","Refer");
191
192	# Add individual keywords.
193	# (The full set of authors will be added below as "Keywords".)
194	if ($id eq "K") {
195	my @keywordlist = split(/,/, $line);
196	foreach my $k (@keywordlist) {
197	$k = lc($k);
198	$k =~ s/\s*$//;
199	$k =~ s/^\s*//;
200	if ($k =~ /\w/) {
201	$k = &text_into_html($k);
202	$doc_obj->add_metadata ($cursection, "Keyword", $k);
203	}
204	}
205	}
206
207	# Add this line of metadata
208	$metadata{$id} .= "$line\n";
209	}
210
211
212
213	# Add the various field as metadata
214	my ($f, $name, $value);
215	foreach $f (keys %metadata) {
216
217	next unless (defined $field{$f});
218	next unless (defined $metadata{$f});
219
220	$name = $field{$f};
221	$value = $metadata{$f};
222
223	# Add the various field as metadata
224
225	# The Creator metadata is found by concatenating authors.
226	if ($f eq "A") {
227
228	my @authorlist = split(/\n/, $value);
229	my $lastauthor = pop @authorlist;
230	my $Creator = "";
231	if (scalar @authorlist) {
232	$Creator = join(", ", @authorlist) . "and $lastauthor";
233	} else {
234	$Creator = $lastauthor;
235	}
236
237	if ($Creator =~ /\w/) {
238	$Creator = &text_into_html($Creator);
239	$doc_obj->add_metadata ($cursection, "Creator", $Creator);
240	}
241	}
242
243	# The rest are added in a standard way
244	else {
245	$value = &text_into_html($value);
246	$doc_obj->add_metadata ($cursection, $name, $value);
247	}
248
249	# Books and Journals are additionally marked for display purposes
250	if ($f eq "B") {
251	$doc_obj->add_metadata($cursection, "BookConfOnly", 1);
252	} elsif ($f eq "J") {
253	$doc_obj->add_metadata($cursection, "JournalsOnly", 1);
254	}
255
256
257	}
258
259	# Add the text in refer format(all fields)
260	if ($text =~ /\w/) {
261	$text = &text_into_html($text);
262	$doc_obj->add_text ($cursection, $text);
263	}
264
265	return 1; # processed the file
266	}
267
268	1;
269	#
270	# Convert a text string into HTML.
271	#
272	# The HTML is going to be inserted into a GML file, so
273	# we have to be careful not to use symbols like ">",
274	# which ocurs frequently in email messages (and use
275	# &gt instead.
276	#
277	# This function also turns links and email addresses into hyperlinks,
278	# and replaces carriage returns with <BR> tags (and multiple carriage
279	# returns with <P> tags).
280	#
281
282	sub text_into_html {
283	my ($text) = @_;
284
285
286	# Convert problem charaters into HTML symbols
287	$text =~ s/&/&/g;
288	$text =~ s/</</g;
289	$text =~ s/>/>/g;
290	$text =~ s/\"/"/g;
291	$text =~ s/\'/ /g;
292	$text =~ s/\+/ /g;
293	$text =~ s/\(/ /g;
294	$text =~ s/\)/ /g;
295
296	# convert email addresses and URLs into links
297	$text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
298	$text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
299
300	# Clean up whitespace and convert \n charaters to <BR> or <P>
301	$text =~ s/ +/ /g;
302	$text =~ s/\s*$//;
303	$text =~ s/^\s*//;
304	$text =~ s/\n/\n<BR>/g;
305	$text =~ s/<BR>\s*<BR>/<P>/g;
306
307	return $text;
308	}
309
310

Note: See TracBrowser for help on using the repository browser.

Download in other formats: