source: trunk/gsdl/perllib/plugins/ReferPlug.pm@ 7243

Last change on this file since 7243 was 6408, checked in by jmt12, 20 years ago

Added two new attributes for script arguments. HiddenGLI controls whether the argument will be visible at all in GLI, while ModeGLI defines the lowest detail mode under which the argument will be visible (only really for import and buildcol). Also ensured that the scripts were reporting their correct default process expressions, and further refined argument types by adding the catagory regexp for any regular expression (which can then be hidden under lower detail modes in GLI)

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# ReferPlug.pm - a plugin for bibliography records in Refer format
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2000 Gordon W. Paynter
10# Copyright 1999-2000 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# ReferPlug reads bibliography files in Refer format.
29#
30# by Gordon W. Paynter ([email protected]), November 2000
31#
32# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
33# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
34# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
35# Boddie and others -- it's hard to tell what came from where, now.
36#
37#
38# ReferPlug creates a document object for every reference in the file.
39# It is a subclass of SplitPlug, so if there are multiple records, all
40# are read.
41#
42# Document text:
43# The document text consists of the reference in Refer format
44#
45# Metadata:
46# $Creator %A Author name
47# $Title %T Title of article of book
48# $Journal %J Title of Journal
49# $Booktitle %B Title of book containing the publication
50# $Report %R Type of Report, paper or thesis
51# $Volume %V Volume Number of Journal
52# $Number %N Number of Journal within Volume
53# $Editor %E Editor name
54# $Pages %P Page Number of article
55# $Publisher %I Name of Publisher
56# $Publisheraddr %C Publisher's address
57# $Date %D Date of publication
58# $Keywords %K Keywords associated with publication
59# $Abstract %X Abstract of publication
60# $Copyright %* Copyright information for the article
61#
62
63# 12/05/02 Added usage datastructure - John Thompson
64
65package ReferPlug;
66
67use SplitPlug;
68
69# ReferPlug is a sub-class of BasPlug.
70sub BEGIN {
71 @ISA = ('SplitPlug');
72}
73
74my $arguments =
75 [ { 'name' => "process_exp",
76 'desc' => "{BasPlug.process_exp}",
77 'type' => "regexp",
78 'deft' => &get_default_process_exp(),
79 'reqd' => "no" },
80 { 'name' => "split_exp",
81 'desc' => "{SplitPlug.split_exp}",
82 'type' => "regexp",
83 'reqd' => "no",
84 'deft' => &get_default_split_exp() }
85 ];
86
87my $options = { 'name' => "ReferPlug",
88 'desc' => "{ReferPlug.desc}",
89 'abstract' => "no",
90 'inherits' => "yes",
91 'args' => $arguments };
92
93# This plugin processes files with the suffix ".bib"
94sub get_default_process_exp {
95 return q^(?i)\.bib$^;
96}
97
98# This plugin splits the input text at blank lines
99sub get_default_split_exp {
100 return q^\n\s*\n^;
101}
102
103sub new {
104 my $class = shift (@_);
105 my $self = new SplitPlug ($class, @_);
106 $self->{'plugin_type'} = "ReferPlug";
107 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
108 my $option_list = $self->{'option_list'};
109 push( @{$option_list}, $options );
110
111 return bless $self, $class;
112}
113
114# The process function reads a single bibliogrphic record and stores
115# it as a new document.
116
117sub process {
118 my $self = shift (@_);
119 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
120 my $outhandle = $self->{'outhandle'};
121
122 # Check that we're dealing with a valid Refer file
123 return undef unless ($$textref =~ /^\s*%/);
124
125 # Report that we're processing the file
126 print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli);
127 print $outhandle "ReferPlug: processing $file\n"
128 if ($self->{'verbosity'}) > 1;
129
130 my %field = ('H', 'Header',
131 'A', 'Creator',
132 'T', 'Title',
133 'J', 'Journal',
134 'B', 'Booktitle',
135 'R', 'Report',
136 'V', 'Volume',
137 'N', 'Number',
138 'E', 'Editor',
139 'P', 'Pages',
140 'I', 'Publisher',
141 'C', 'PublisherAddress',
142 'D', 'Date',
143 'O', 'OtherInformation',
144 'K', 'Keywords',
145 'X', 'Abstract',
146 '*', 'Copyright');
147
148 # Metadata fields
149 my %metadata;
150 my ($id, $Creator, $Keywords, $text);
151 my @lines = split(/\n+/, $$textref);
152
153
154 # Read and process each line in the bib file.
155 # Each file consists of a set of metadata items, one to each line
156 # with the Refer key followed by a space then the associated data
157 foreach my $line (@lines) {
158
159 # Add each line. Most lines consist of a field identifer and
160 # then data, and we simply store them, though we treat some
161 # of the fields a bit differently.
162
163 $line =~ s/\s+/ /g;
164 $text .= "$line\n";
165 $ReferFormat .= "$line\n";
166
167 next unless ($line =~ /^%[A-Z\*]/);
168 $id = substr($line,1,1);
169 $line =~ s/^%. //;
170
171 # Add individual authors in "Lastname, Firstname" format.
172 # (The full set of authors will be added below as "Creator".)
173 if ($id eq "A") {
174
175 # Reformat and add author name
176 my @words = split(/ /, $line);
177 my $lastname = pop @words;
178 my $firstname = join(" ", @words);
179 my $fullname = $lastname . ", " . $firstname;
180
181 # Add each name to set of Authors
182 if ($fullname =~ /\w/) {
183 $fullname = &text_into_html($fullname);
184 $doc_obj->add_metadata ($cursection, "Author", $fullname);
185 }
186 }
187
188 # Add individual keywords.
189 # (The full set of authors will be added below as "Keywords".)
190 if ($id eq "K") {
191 my @keywordlist = split(/,/, $line);
192 foreach my $k (@keywordlist) {
193 $k = lc($k);
194 $k =~ s/\s*$//;
195 $k =~ s/^\s*//;
196 if ($k =~ /\w/) {
197 $k = &text_into_html($k);
198 $doc_obj->add_metadata ($cursection, "Keyword", $k);
199 }
200 }
201 }
202
203 # Add this line of metadata
204 $metadata{$id} .= "$line\n";
205 }
206
207
208
209 # Add the various field as metadata
210 my ($f, $name, $value);
211 foreach $f (keys %metadata) {
212
213 next unless (defined $field{$f});
214 next unless (defined $metadata{$f});
215
216 $name = $field{$f};
217 $value = $metadata{$f};
218
219 # Add the various field as metadata
220
221 # The Creator metadata is found by concatenating authors.
222 if ($f eq "A") {
223
224 my @authorlist = split(/\n/, $value);
225 my $lastauthor = pop @authorlist;
226 my $Creator = "";
227 if (scalar @authorlist) {
228 $Creator = join(", ", @authorlist) . "and $lastauthor";
229 } else {
230 $Creator = $lastauthor;
231 }
232
233 if ($Creator =~ /\w/) {
234 $Creator = &text_into_html($Creator);
235 $doc_obj->add_metadata ($cursection, "Creator", $Creator);
236 }
237 }
238
239 # The rest are added in a standard way
240 else {
241 $value = &text_into_html($value);
242 $doc_obj->add_metadata ($cursection, $name, $value);
243 }
244
245 # Books and Journals are additionally marked for display purposes
246 if ($f eq "B") {
247 $doc_obj->add_metadata($cursection, "BookConfOnly", 1);
248 } elsif ($f eq "J") {
249 $doc_obj->add_metadata($cursection, "JournalsOnly", 1);
250 }
251
252
253 }
254
255 # Add the text in refer format(all fields)
256 if ($text =~ /\w/) {
257 $text = &text_into_html($text);
258 $doc_obj->add_text ($cursection, $text);
259 }
260
261 return 1; # processed the file
262}
263
2641;
265#
266# Convert a text string into HTML.
267#
268# The HTML is going to be inserted into a GML file, so
269# we have to be careful not to use symbols like ">",
270# which ocurs frequently in email messages (and use
271# &gt instead.
272#
273# This function also turns links and email addresses into hyperlinks,
274# and replaces carriage returns with <BR> tags (and multiple carriage
275# returns with <P> tags).
276#
277
278sub text_into_html {
279 my ($text) = @_;
280
281
282 # Convert problem charaters into HTML symbols
283 $text =~ s/&/&amp;/g;
284 $text =~ s/</&lt;/g;
285 $text =~ s/>/&gt;/g;
286 $text =~ s/\"/&quot;/g;
287 $text =~ s/\'/ /g;
288 $text =~ s/\+/ /g;
289 $text =~ s/\(/ /g;
290 $text =~ s/\)/ /g;
291
292 # convert email addresses and URLs into links
293 $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
294 $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
295
296 # Clean up whitespace and convert \n charaters to <BR> or <P>
297 $text =~ s/ +/ /g;
298 $text =~ s/\s*$//;
299 $text =~ s/^\s*//;
300 $text =~ s/\n/\n<BR>/g;
301 $text =~ s/<BR>\s*<BR>/<P>/g;
302
303 return $text;
304}
305
306
Note: See TracBrowser for help on using the repository browser.