Context Navigation

ReferPlugin.pm@ 32584

Last change on this file since 32584 was 31492, checked in by kjdon, 7 years ago
renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes
Property svn:keywords set to `Author Date Id Revision`
File size: 8.5 KB

Rev	Line
[1676]	1	###########################################################################
	2	#
[15872]	3	# ReferPlugin.pm - a plugin for bibliography records in Refer format
[1676]	4	#
	5	# A component of the Greenstone digital library software
	6	# from the New Zealand Digital Library Project at the
	7	# University of Waikato, New Zealand.
	8	#
	9	# Copyright 2000 Gordon W. Paynter
	10	# Copyright 1999-2000 New Zealand Digital Library Project
	11	#
	12	# This program is free software; you can redistribute it and/or modify
	13	# it under the terms of the GNU General Public License as published by
	14	# the Free Software Foundation; either version 2 of the License, or
	15	# (at your option) any later version.
	16	#
	17	# This program is distributed in the hope that it will be useful,
	18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	# GNU General Public License for more details.
	21	#
	22	# You should have received a copy of the GNU General Public License
	23	# along with this program; if not, write to the Free Software
	24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	25	#
	26	###########################################################################
	27
[15872]	28	# ReferPlugin reads bibliography files in Refer format.
[1676]	29	#
	30	# by Gordon W. Paynter ([email protected]), November 2000
	31	#
	32	# Loosely based on hcibib2Plug by Steve Jones ([email protected]).
	33	# Which was based on EMAILPlug by Gordon Paynter ([email protected]).
	34	# Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan
	35	# Boddie and others -- it's hard to tell what came from where, now.
	36	#
	37	#
[15872]	38	# ReferPlugin creates a document object for every reference in the file.
	39	# It is a subclass of SplitTextFile, so if there are multiple records, all
[1676]	40	# are read.
	41	#
	42	# Document text:
	43	# The document text consists of the reference in Refer format
	44	#
	45	# Metadata:
	46	# $Creator %A Author name
	47	# $Title %T Title of article of book
	48	# $Journal %J Title of Journal
	49	# $Booktitle %B Title of book containing the publication
	50	# $Report %R Type of Report, paper or thesis
	51	# $Volume %V Volume Number of Journal
	52	# $Number %N Number of Journal within Volume
	53	# $Editor %E Editor name
	54	# $Pages %P Page Number of article
	55	# $Publisher %I Name of Publisher
	56	# $Publisheraddr %C Publisher's address
	57	# $Date %D Date of publication
	58	# $Keywords %K Keywords associated with publication
	59	# $Abstract %X Abstract of publication
	60	# $Copyright %* Copyright information for the article
	61	#
	62
[15872]	63	package ReferPlugin;
[1676]	64
[15872]	65	use SplitTextFile;
[24548]	66	use MetadataRead;
[10254]	67	use strict;
	68	no strict 'refs'; # allow filehandles to be variables and viceversa
[1676]	69
[31492]	70	# ReferPlugin is a sub-class of BaseImporter.
[1676]	71	sub BEGIN {
[24548]	72	@ReferPlugin::ISA = ('MetadataRead', 'SplitTextFile');
[1676]	73	}
	74
[4744]	75	my $arguments =
	76	[ { 'name' => "process_exp",
[31492]	77	'desc' => "{BaseImporter.process_exp}",
[6408]	78	'type' => "regexp",
[4744]	79	'deft' => &get_default_process_exp(),
[6408]	80	'reqd' => "no" },
	81	{ 'name' => "split_exp",
[15872]	82	'desc' => "{SplitTextFile.split_exp}",
[6408]	83	'type' => "regexp",
	84	'reqd' => "no",
	85	'deft' => &get_default_split_exp() }
	86	];
[3540]	87
[15872]	88	my $options = { 'name' => "ReferPlugin",
	89	'desc' => "{ReferPlugin.desc}",
[6408]	90	'abstract' => "no",
[3540]	91	'inherits' => "yes",
[12291]	92	'explodes' => "yes",
[3540]	93	'args' => $arguments };
	94
[1676]	95	# This plugin processes files with the suffix ".bib"
	96	sub get_default_process_exp {
	97	return q^(?i)\.bib$^;
	98	}
	99
	100	# This plugin splits the input text at blank lines
	101	sub get_default_split_exp {
	102	return q^\n\s*\n^;
	103	}
	104
[3540]	105	sub new {
[10218]	106	my ($class) = shift (@_);
	107	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
	108	push(@$pluginlist, $class);
[3540]	109
[15872]	110	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
	111	push(@{$hashArgOptLists->{"OptList"}},$options);
[10218]	112
[15872]	113	my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
[10218]	114
[3540]	115	return bless $self, $class;
	116	}
	117
[16104]	118	# The process function reads a single bibliographic record and stores
[1676]	119	# it as a new document.
	120
	121	sub process {
	122	my $self = shift (@_);
[6332]	123	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
[1676]	124	my $outhandle = $self->{'outhandle'};
	125
	126	# Check that we're dealing with a valid Refer file
	127	return undef unless ($$textref =~ /^\s*%/);
	128
[10254]	129	my $cursection = $doc_obj->get_top_section();
[1676]	130
	131	my %field = ('H', 'Header',
	132	'A', 'Creator',
	133	'T', 'Title',
	134	'J', 'Journal',
	135	'B', 'Booktitle',
	136	'R', 'Report',
	137	'V', 'Volume',
	138	'N', 'Number',
	139	'E', 'Editor',
	140	'P', 'Pages',
	141	'I', 'Publisher',
	142	'C', 'PublisherAddress',
	143	'D', 'Date',
	144	'O', 'OtherInformation',
	145	'K', 'Keywords',
	146	'X', 'Abstract',
	147	'*', 'Copyright');
	148
	149	# Metadata fields
	150	my %metadata;
	151	my ($id, $Creator, $Keywords, $text);
	152	my @lines = split(/\n+/, $$textref);
	153
	154
	155	# Read and process each line in the bib file.
	156	# Each file consists of a set of metadata items, one to each line
	157	# with the Refer key followed by a space then the associated data
	158	foreach my $line (@lines) {
	159
	160	# Add each line. Most lines consist of a field identifer and
	161	# then data, and we simply store them, though we treat some
	162	# of the fields a bit differently.
	163
	164	$line =~ s/\s+/ /g;
	165	$text .= "$line\n";
[10254]	166	# $ReferFormat .= "$line\n"; # what is this???
[1676]	167
	168	next unless ($line =~ /^%[A-Z\*]/);
	169	$id = substr($line,1,1);
	170	$line =~ s/^%. //;
	171
	172	# Add individual authors in "Lastname, Firstname" format.
	173	# (The full set of authors will be added below as "Creator".)
	174	if ($id eq "A") {
	175
	176	# Reformat and add author name
	177	my @words = split(/ /, $line);
	178	my $lastname = pop @words;
	179	my $firstname = join(" ", @words);
	180	my $fullname = $lastname . ", " . $firstname;
	181
	182	# Add each name to set of Authors
	183	if ($fullname =~ /\w/) {
	184	$fullname = &text_into_html($fullname);
	185	$doc_obj->add_metadata ($cursection, "Author", $fullname);
	186	}
	187	}
	188
	189	# Add individual keywords.
	190	# (The full set of authors will be added below as "Keywords".)
	191	if ($id eq "K") {
	192	my @keywordlist = split(/,/, $line);
	193	foreach my $k (@keywordlist) {
	194	$k = lc($k);
	195	$k =~ s/\s*$//;
	196	$k =~ s/^\s*//;
	197	if ($k =~ /\w/) {
	198	$k = &text_into_html($k);
	199	$doc_obj->add_metadata ($cursection, "Keyword", $k);
	200	}
	201	}
	202	}
	203
	204	# Add this line of metadata
	205	$metadata{$id} .= "$line\n";
	206	}
	207
	208
	209
	210	# Add the various field as metadata
	211	my ($f, $name, $value);
	212	foreach $f (keys %metadata) {
	213
	214	next unless (defined $field{$f});
	215	next unless (defined $metadata{$f});
	216
	217	$name = $field{$f};
	218	$value = $metadata{$f};
	219
	220	# Add the various field as metadata
	221
	222	# The Creator metadata is found by concatenating authors.
	223	if ($f eq "A") {
	224
	225	my @authorlist = split(/\n/, $value);
	226	my $lastauthor = pop @authorlist;
	227	my $Creator = "";
	228	if (scalar @authorlist) {
[12291]	229	$Creator = join(", ", @authorlist) . " and $lastauthor";
[1676]	230	} else {
	231	$Creator = $lastauthor;
	232	}
	233
	234	if ($Creator =~ /\w/) {
	235	$Creator = &text_into_html($Creator);
	236	$doc_obj->add_metadata ($cursection, "Creator", $Creator);
	237	}
	238	}
	239
	240	# The rest are added in a standard way
	241	else {
	242	$value = &text_into_html($value);
	243	$doc_obj->add_metadata ($cursection, $name, $value);
	244	}
	245
	246	# Books and Journals are additionally marked for display purposes
	247	if ($f eq "B") {
	248	$doc_obj->add_metadata($cursection, "BookConfOnly", 1);
	249	} elsif ($f eq "J") {
	250	$doc_obj->add_metadata($cursection, "JournalsOnly", 1);
	251	}
	252
	253
	254	}
	255
	256	# Add the text in refer format(all fields)
	257	if ($text =~ /\w/) {
	258	$text = &text_into_html($text);
	259	$doc_obj->add_text ($cursection, $text);
	260	}
[11827]	261	# Add FileFormat as the metadata
	262	$doc_obj->add_metadata($cursection,"FileFormat","Refer");
[1676]	263
	264	return 1; # processed the file
	265	}
	266
	267	1;
	268	#
	269	# Convert a text string into HTML.
	270	#
	271	# The HTML is going to be inserted into a GML file, so
	272	# we have to be careful not to use symbols like ">",
	273	# which ocurs frequently in email messages (and use
	274	# &gt instead.
	275	#
	276	# This function also turns links and email addresses into hyperlinks,
	277	# and replaces carriage returns with <BR> tags (and multiple carriage
	278	# returns with <P> tags).
	279	#
	280
	281	sub text_into_html {
	282	my ($text) = @_;
	283
	284
	285	# Convert problem charaters into HTML symbols
	286	$text =~ s/&/&/g;
	287	$text =~ s/</</g;
	288	$text =~ s/>/>/g;
	289	$text =~ s/\"/"/g;
	290	$text =~ s/\'/ /g;
	291	$text =~ s/\+/ /g;
	292	$text =~ s/\(/ /g;
	293	$text =~ s/\)/ /g;
	294
	295	# convert email addresses and URLs into links
	296	$text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
	297	$text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
	298
	299	# Clean up whitespace and convert \n charaters to <BR> or <P>
	300	$text =~ s/ +/ /g;
	301	$text =~ s/\s*$//;
	302	$text =~ s/^\s*//;
	303	$text =~ s/\n/\n<BR>/g;
	304	$text =~ s/<BR>\s*<BR>/<P>/g;
	305
	306	return $text;
	307	}
	308
	309

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/perllib/plugins/ReferPlugin.pm@ 32584

Download in other formats: