Context Navigation

source: trunk/gsdl/perllib/plugins/BibTexPlug.pm@ 1903

Last change on this file since 1903 was 1711, checked in by say1, 24 years ago
fixed minor spelling mistake
Property svn:keywords set to `Author Date Id Revision`
File size: 7.3 KB

Line
1	###########################################################################
2	#
3	# BibTexPlug.pm - a plugin for bibliography records in BibTex format
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright 2000 Gordon W. Paynter
10	# Copyright 1999-2000 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28
29	# BibTexPlug reads bibliography files in BibTex format.
30	#
31	# by Gordon W. Paynter ([email protected]), November 2000
32	# Based on ReferPlug. See ReferPlug for geneology.
33	#
34	# BibTexPlug creates a document object for every reference a the file.
35	# It is a subclass of SplitPlug, so if there are multiple records, all
36	# are read.
37
38
39	package BibTexPlug;
40
41	use SplitPlug;
42
43
44	# BibTexPlug is a sub-class of BasPlug.
45	sub BEGIN {
46	@ISA = ('SplitPlug');
47	}
48
49	# This plugin processes files with the suffix ".bib"
50	sub get_default_process_exp {
51	return q^(?i)\.bib$^;
52	}
53
54	# This plugin splits the input text at blank lines
55	sub get_default_split_exp {
56	return q^\n+(?=@)^;
57	}
58
59
60	# The process function reads a single bibliographic record and stores
61	# it as a new document.
62
63	sub process {
64	my $self = shift (@_);
65	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
66	my $outhandle = $self->{'outhandle'};
67
68	# Check that we're dealing with a valid BibTex record
69	return undef unless ($$textref =~ /^@\w+\{.*\}/s);
70
71	# Ignore things we can't use
72	return 0 if ($$textref =~ /^\@String/);
73
74	# Report that we're processing the file
75	print $outhandle "BibTexPlug: processing $file\n"
76	if ($self->{'verbosity'}) > 1;
77
78
79	# This hash translates BibTex field names into metadata names. The
80	# BibTex names are taken from the "Local Guide to Latex" Graeme
81	# McKinstry. Metadata names are consistabnt with ReferPlug.
82
83	my %field = (
84	'address', 'PublisherAddress',
85	'author', 'Creator',
86	'booktitle', 'Booktitle',
87	'chapter', 'Chapter',
88	'edition', 'Edition',
89	'editor', 'Editor',
90	'institution', 'Publisher',
91	'journal', 'Journal',
92	'month', 'Month',
93	'number', 'Number',
94	'pages', 'Pages',
95	'publisher', 'Publisher',
96	'school', 'Publisher',
97	'title', 'Title',
98	'volume', 'Volume',
99	'year', 'Date',
100
101	'keywords', 'Keywords',
102	'abstract', 'Abstract',
103	'copyright', 'Copyright');
104
105	# Metadata fields
106	my %metadata;
107	my ($EntryType, $EntryID, $Creator, $Keywords, $text);
108
109	# Make sure the text has exactly one entry per line
110	my $lines = $$textref;
111	$lines =~ s/,\s*\n/=====/g;
112	$lines =~ s/\s+/ /g;
113	$lines =~ s/\s=====\s/\n/g;
114	my @lines = split(/\n+/, $lines);
115
116	# Read and process each line in the bib file.
117	my ($id, $name, $value, $line);
118	foreach $line (@lines) {
119
120	# Add each line. Most lines consist of a field identifer and
121	# then data, and we simply store them, though we treat some
122	# of the fields a bit differently.
123
124	$line =~ s/\s+/ /g;
125	$text .= "$line\n";
126
127	# The first line is special, it contains the reference type and OID
128	if ($line =~ /\@(\w+)\{([\w\d]*),/) {
129	$EntryType = $1;
130	$EntryID = $2;
131	print "** $EntryType - $EntryID \n";
132	next;
133	}
134
135	# otherwise, parse the metadata out of this line
136	next unless ($line =~ /^\s(\w+)\s+=\s+(.)/);
137	$id = lc($1);
138	$value = $2;
139
140	# Add this line of metadata
141	$metadata{$id} .= "$value\n";
142	}
143
144	# Add the Entry type as metadata
145	$doc_obj->add_metadata ($cursection, "EntryType", $EntryType);
146
147	# Add the various field as metadata
148	foreach my $id (keys %metadata) {
149
150	next unless (defined $field{$id});
151	next unless (defined $metadata{$id});
152
153	$name = $field{$id};
154	$value = $metadata{$id};
155
156	# Get rid of silly Latex stuff
157	if ($value =~ /\"(.*)\"/) {
158	$value = $1;
159	}
160	if ($value =~ /\{(.*)\}/) {
161	$value = $1;
162	}
163
164	# Add the various field as metadata
165	$value = &text_into_html($value);
166	$doc_obj->add_metadata ($cursection, $name, $value);
167
168	# Several special operatons on metadata follow
169
170	# Add individual keywords.
171	# The full set of keywords will be added, in due course, as "Keywords".
172	# However, we also want to add them as individual "Keyword" metadata elements.
173	if ($id eq "keywords") {
174	my @keywordlist = split(/,/, $value);
175	foreach my $k (@keywordlist) {
176	$k = lc($k);
177	$k =~ s/\s*$//;
178	$k =~ s/^\s*//;
179	if ($k =~ /\w/) {
180	$k = &text_into_html($k);
181	$doc_obj->add_metadata ($cursection, "Keyword", $k);
182	}
183	}
184	}
185
186	# Add individual authors
187	# The author metadata will be stored as one "Creator" entry, but we
188	# also want to split it into several individual "Author" fields in
189	# "Lastename, Firstnames" format so we can browse it.
190	if ($id eq "author") {
191
192	my @authorlist = split(/(,\|and)/, $value);
193	foreach $a (@authorlist) {
194	$a =~ s/\s*$//;
195	$a =~ s/^\s*//;
196
197	# Reformat and add author name
198	my @words = split(/ /, $a);
199	my $lastname = pop @words;
200	my $firstname = join(" ", @words);
201	my $fullname = $lastname . ", " . $firstname;
202
203	# Add each name to set of Authors
204	if ($fullname =~ /\w+, \w+/) {
205	$fullname = &text_into_html($fullname);
206	$doc_obj->add_metadata ($cursection, "Author", $fullname);
207	}
208	}
209	}
210
211	# Books and Journals are additionally marked for display purposes
212	if ($id eq "booktitle") {
213	$doc_obj->add_metadata($cursection, "BookConfOnly", 1);
214	} elsif ($id eq "journal") {
215	$doc_obj->add_metadata($cursection, "JournalsOnly", 1);
216	}
217
218	}
219
220	# Add the text in BibTex format (all fields)
221	if ($text =~ /\w/) {
222	$text = &text_into_html($text);
223	$doc_obj->add_text ($cursection, $text);
224	}
225
226	return 1;
227	}
228
229	1;
230
231
232	# Convert a text string into HTML.
233
234	# The HTML is going to be inserted into a GML file, so we have to be
235	# careful not to use symbols like ">", which ocurs frequently in email
236	# messages (and use &gt instead.
237
238	# This function also turns URLs and email addresses into links, and
239	# replaces carriage returns with <BR> tags (and multiple carriage returns
240	# with <P> tags).
241
242
243	sub text_into_html {
244	my ($text) = @_;
245
246
247	# Convert problem charaters into HTML symbols
248	$text =~ s/&/&/g;
249	$text =~ s/</</g;
250	$text =~ s/>/>/g;
251	$text =~ s/\"/"/g;
252	$text =~ s/\'/ /g;
253	$text =~ s/\+/ /g;
254	$text =~ s/\(/ /g;
255	$text =~ s/\)/ /g;
256
257	# convert email addresses and URLs into links
258	$text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
259	$text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
260
261	# Clean up whitespace and convert \n charaters to <BR> or <P>
262	$text =~ s/ +/ /g;
263	$text =~ s/\s*$//;
264	$text =~ s/^\s*//;
265	$text =~ s/\n/\n<BR>/g;
266	$text =~ s/<BR>\s*<BR>/<P>/g;
267
268	return $text;
269	}
270
271

Note: See TracBrowser for help on using the repository browser.

Download in other formats: