Context Navigation

source: trunk/gsdl/perllib/plugins/ReferPlug.pm@ 3540

Last change on this file since 3540 was 3540, checked in by kjdon, 22 years ago
added John T's changes into CVS - added info to enable retrieval of usage info in xml
Property svn:keywords set to `Author Date Id Revision`
File size: 9.6 KB

Line
1	###########################################################################
2	#
3	# ReferPlug.pm - a plugin for bibliography records in Refer format
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright 2000 Gordon W. Paynter
10	# Copyright 1999-2000 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# ReferPlug reads bibliography files in Refer format.
29	#
30	# by Gordon W. Paynter ([email protected]), November 2000
31	#
32	# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33	# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34	# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35	# Boddie and others -- it's hard to tell what came from where, now.
36	#
37	#
38	# ReferPlug creates a document object for every reference in the file.
39	# It is a subclass of SplitPlug, so if there are multiple records, all
40	# are read.
41	#
42	# Document text:
43	# The document text consists of the reference in Refer format
44	#
45	# Metadata:
46	# $Creator %A Author name
47	# $Title %T Title of article of book
48	# $Journal %J Title of Journal
49	# $Booktitle %B Title of book containing the publication
50	# $Report %R Type of Report, paper or thesis
51	# $Volume %V Volume Number of Journal
52	# $Number %N Number of Journal within Volume
53	# $Editor %E Editor name
54	# $Pages %P Page Number of article
55	# $Publisher %I Name of Publisher
56	# $Publisheraddr %C Publisher's address
57	# $Date %D Date of publication
58	# $Keywords %K Keywords associated with publication
59	# $Abstract %X Abstract of publication
60	# $Copyright %* Copyright information for the article
61	#
62
63	# 12/05/02 Added usage datastructure - John Thompson
64
65	package ReferPlug;
66
67	use SplitPlug;
68
69	# ReferPlug is a sub-class of BasPlug.
70	sub BEGIN {
71	@ISA = ('SplitPlug');
72	}
73
74	my $arguments = [ { 'name' => "process_exp",
75	'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
76	'type' => "string",
77	'deft' => q^(?i)\.bib$^,
78	'reqd' => "no" } ];
79
80	my $options = { 'name' => "ReferPlug",
81	'desc' => "ReferPlug reads bibliography files in Refer format.\nBy Gordon W. Paynter (gwp\@cs.waikato.ac.nz), November 2000\n\nLoosely based on hcibib2Plug by Steve Jones (stevej\@cs.waikato.ac.nz). Which was based on EMAILPlug by Gordon Paynter (gwp\@cs.waikato.ac.nz). Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan Boddie and others -- it's hard to tell what came from where, now.\n\nReferPlug creates a document object for every reference in the file. It is a subclass of SplitPlug, so if there are multiple records, all are read.\n\nDocument text:\n\tThe document text consists of the reference in Refer format.\nMetadata:\n\t\$Creator \%A Author name\n\t\$Title \%T Title of article of book\n\t\$Journal \%J Title of Journal\n\t\$Booktitle \%B Title of book containing the publication\n\t\$Report \%R Type of Report, paper or thesis\n\t\$Volume \%V Volume Number of Journal\n\t\$Number \%N Number of Journal within Volume\n\t\$Editor \%E Editor name\n\t\$Pages \%P Page Number of article\n\t\$Publisher \%I Name of Publisher\n\t\$Publisheraddr \%C Publisher's address\n\t\$Date \%D Date of publication\n\t\$Keywords \%K Keywords associated with publication\n\t\$Abstract \%X Abstract of publication\n\t\$Copyright\t\%* Copyright information for the article",
82	'inherits' => "yes",
83	'args' => $arguments };
84
85	# This plugin processes files with the suffix ".bib"
86	sub get_default_process_exp {
87	return q^(?i)\.bib$^;
88	}
89
90	# This plugin splits the input text at blank lines
91	sub get_default_split_exp {
92	return q^\n\s*\n^;
93	}
94
95	sub new {
96	my $class = shift (@_);
97	my $self = new SplitPlug ($class, @_);
98
99	# 14-05-02 To allow for proper inheritance of arguments - John Thompson
100	my $option_list = $self->{'option_list'};
101	push( @{$option_list}, $options );
102
103	return bless $self, $class;
104	}
105
106	# The process function reads a single bibliogrphic record and stores
107	# it as a new document.
108
109	sub process {
110	my $self = shift (@_);
111	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
112	my $outhandle = $self->{'outhandle'};
113
114	# Check that we're dealing with a valid Refer file
115	return undef unless ($$textref =~ /^\s*%/);
116
117	# Report that we're processing the file
118	print $outhandle "ReferPlug: processing $file\n"
119	if ($self->{'verbosity'}) > 1;
120
121	my %field = ('H', 'Header',
122	'A', 'Creator',
123	'T', 'Title',
124	'J', 'Journal',
125	'B', 'Booktitle',
126	'R', 'Report',
127	'V', 'Volume',
128	'N', 'Number',
129	'E', 'Editor',
130	'P', 'Pages',
131	'I', 'Publisher',
132	'C', 'PublisherAddress',
133	'D', 'Date',
134	'O', 'OtherInformation',
135	'K', 'Keywords',
136	'X', 'Abstract',
137	'*', 'Copyright');
138
139	# Metadata fields
140	my %metadata;
141	my ($id, $Creator, $Keywords, $text);
142	my @lines = split(/\n+/, $$textref);
143
144
145	# Read and process each line in the bib file.
146	# Each file consists of a set of metadata items, one to each line
147	# with the Refer key followed by a space then the associated data
148	foreach my $line (@lines) {
149
150	# Add each line. Most lines consist of a field identifer and
151	# then data, and we simply store them, though we treat some
152	# of the fields a bit differently.
153
154	$line =~ s/\s+/ /g;
155	$text .= "$line\n";
156	$ReferFormat .= "$line\n";
157
158	next unless ($line =~ /^%[A-Z\*]/);
159	$id = substr($line,1,1);
160	$line =~ s/^%. //;
161
162	# Add individual authors in "Lastname, Firstname" format.
163	# (The full set of authors will be added below as "Creator".)
164	if ($id eq "A") {
165
166	# Reformat and add author name
167	my @words = split(/ /, $line);
168	my $lastname = pop @words;
169	my $firstname = join(" ", @words);
170	my $fullname = $lastname . ", " . $firstname;
171
172	# Add each name to set of Authors
173	if ($fullname =~ /\w/) {
174	$fullname = &text_into_html($fullname);
175	$doc_obj->add_metadata ($cursection, "Author", $fullname);
176	}
177	}
178
179	# Add individual keywords.
180	# (The full set of authors will be added below as "Keywords".)
181	if ($id eq "K") {
182	my @keywordlist = split(/,/, $line);
183	foreach my $k (@keywordlist) {
184	$k = lc($k);
185	$k =~ s/\s*$//;
186	$k =~ s/^\s*//;
187	if ($k =~ /\w/) {
188	$k = &text_into_html($k);
189	$doc_obj->add_metadata ($cursection, "Keyword", $k);
190	}
191	}
192	}
193
194	# Add this line of metadata
195	$metadata{$id} .= "$line\n";
196	}
197
198
199
200	# Add the various field as metadata
201	my ($f, $name, $value);
202	foreach $f (keys %metadata) {
203
204	next unless (defined $field{$f});
205	next unless (defined $metadata{$f});
206
207	$name = $field{$f};
208	$value = $metadata{$f};
209
210	# Add the various field as metadata
211
212	# The Creator metadata is found by concatenating authors.
213	if ($f eq "A") {
214
215	my @authorlist = split(/\n/, $value);
216	my $lastauthor = pop @authorlist;
217	my $Creator = "";
218	if (scalar @authorlist) {
219	$Creator = join(", ", @authorlist) . "and $lastauthor";
220	} else {
221	$Creator = $lastauthor;
222	}
223
224	if ($Creator =~ /\w/) {
225	$Creator = &text_into_html($Creator);
226	$doc_obj->add_metadata ($cursection, "Creator", $Creator);
227	}
228	}
229
230	# The rest are added in a standard way
231	else {
232	$value = &text_into_html($value);
233	$doc_obj->add_metadata ($cursection, $name, $value);
234	}
235
236	# Books and Journals are additionally marked for display purposes
237	if ($f eq "B") {
238	$doc_obj->add_metadata($cursection, "BookConfOnly", 1);
239	} elsif ($f eq "J") {
240	$doc_obj->add_metadata($cursection, "JournalsOnly", 1);
241	}
242
243
244	}
245
246	# Add the text in refer format(all fields)
247	if ($text =~ /\w/) {
248	$text = &text_into_html($text);
249	$doc_obj->add_text ($cursection, $text);
250	}
251
252	return 1; # processed the file
253	}
254
255	1;
256	#
257	# Convert a text string into HTML.
258	#
259	# The HTML is going to be inserted into a GML file, so
260	# we have to be careful not to use symbols like ">",
261	# which ocurs frequently in email messages (and use
262	# &gt instead.
263	#
264	# This function also turns links and email addresses into hyperlinks,
265	# and replaces carriage returns with <BR> tags (and multiple carriage
266	# returns with <P> tags).
267	#
268
269	sub text_into_html {
270	my ($text) = @_;
271
272
273	# Convert problem charaters into HTML symbols
274	$text =~ s/&/&/g;
275	$text =~ s/</</g;
276	$text =~ s/>/>/g;
277	$text =~ s/\"/"/g;
278	$text =~ s/\'/ /g;
279	$text =~ s/\+/ /g;
280	$text =~ s/\(/ /g;
281	$text =~ s/\)/ /g;
282
283	# convert email addresses and URLs into links
284	$text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
285	$text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
286
287	# Clean up whitespace and convert \n charaters to <BR> or <P>
288	$text =~ s/ +/ /g;
289	$text =~ s/\s*$//;
290	$text =~ s/^\s*//;
291	$text =~ s/\n/\n<BR>/g;
292	$text =~ s/<BR>\s*<BR>/<P>/g;
293
294	return $text;
295	}
296
297

Note: See TracBrowser for help on using the repository browser.

Download in other formats: