Context Navigation

ProCitePlugin.pm@ 32280

Last change on this file since 32280 was 31492, checked in by kjdon, 7 years ago
renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes
Property svn:keywords set to `Author Date Id Revision`
File size: 7.6 KB

Line
1	###########################################################################
2	#
3	# ProCitePlugin.pm -- A plugin for (exported) ProCite databases
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright 1999-2004 New Zealand Digital Library Project
10	#
11	# This program is free software; you can redistribute it and/or modify
12	# it under the terms of the GNU General Public License as published by
13	# the Free Software Foundation; either version 2 of the License, or
14	# (at your option) any later version.
15	#
16	# This program is distributed in the hope that it will be useful,
17	# but WITHOUT ANY WARRANTY; without even the implied warranty of
18	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	# GNU General Public License for more details.
20	#
21	# You should have received a copy of the GNU General Public License
22	# along with this program; if not, write to the Free Software
23	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	#
25	###########################################################################
26
27	package ProCitePlugin;
28
29
30	use multiread;
31	use SplitTextFile;
32	use MetadataRead;
33
34	use strict;
35	no strict 'refs'; # allow filehandles to be variables and viceversa
36
37	# ProCitePlugin is a sub-class of SplitTextFile
38	sub BEGIN {
39	@ProCitePlugin::ISA = ('MetadataRead', 'SplitTextFile');
40	}
41
42
43	my $arguments =
44	[ { 'name' => "process_exp",
45	'desc' => "{BaseImporter.process_exp}",
46	'type' => "regexp",
47	'reqd' => "no",
48	'deft' => &get_default_process_exp() },
49	{ 'name' => "split_exp",
50	'desc' => "{SplitTextFile.split_exp}",
51	'type' => "regexp",
52	'deft' => &get_default_split_exp(),
53	'reqd' => "no" },
54
55	# The interesting options
56	{ 'name' => "entry_separator",
57	'desc' => "{ProCitePlugin.entry_separator}",
58	'type' => "string",
59	'reqd' => "no",
60	'deft' => "//" },
61	];
62
63	my $options = { 'name' => "ProCitePlugin",
64	'desc' => "{ProCitePlugin.desc}",
65	'abstract' => "no",
66	'inherits' => "yes",
67	'explodes' => "yes",
68	'args' => $arguments };
69
70
71	# This plugin processes exported ProCite files with the suffix ".txt"
72	sub get_default_process_exp
73	{
74	return q^(?i)(\.txt)$^;
75	}
76
77
78	# This plugin splits the input text at every line
79	sub get_default_split_exp
80	{
81	return q^\n^;
82	}
83
84
85	sub new
86	{
87	my ($class) = shift (@_);
88	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
89	push(@$pluginlist, $class);
90
91	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
92	push(@{$hashArgOptLists->{"OptList"}},$options);
93
94	my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
95
96	return bless $self, $class;
97	}
98
99
100	my %crazy_workform_mapping =
101	( "A", "Book, Long Form",
102	"B", "Book, Short Form",
103	"C", "Journal, Long Form",
104	"D", "Journal, Short Form",
105	"E", "Report",
106	"F", "Newspaper",
107	"G", "Dissertation",
108	"H", "Trade Catalog",
109	"I", "Letter (Correspondence)",
110	"J", "Manuscript",
111	"K", "Conference Proceedings",
112	"L", "Map",
113	"M", "Music Score",
114	"N", "Sound Recording",
115	"O", "Motion Picture",
116	"P", "Audiovisual Material",
117	"Q", "Video Recording",
118	"R", "Art Work",
119	"S", "Computer Program",
120	"T", "Data File" );
121
122
123	sub read_file
124	{
125	my $self = shift (@_);
126	my ($filename, $encoding, $language, $textref) = @_;
127
128	# Store the workform definitions for this file
129	my %workform_definitions = ();
130
131	# Read the contents of the file into $textref
132	open(PROCITE_FILE, "<$filename");
133	my $reader = new multiread();
134	$reader->set_handle ('ProCitePlugin::PROCITE_FILE');
135	$reader->set_encoding ($encoding);
136	$reader->read_file ($textref);
137	close(PROCITE_FILE);
138
139	# Read the workform definitions at the start of the file
140	while ($$textref =~ /^\<Workform Definition\>/) {
141	# Remove the workform definition line so it is not processed later as a record
142	$$textref =~ s/^\<Workform Definition\>(.*)\n//;
143	my $workform_definition = $1;
144	# Parse the workform definitions and store them for later
145	$workform_definition =~ s/^\"([^\"]*)\",//;
146	my $workform_name = $1;
147	my @workform_values;
148	while ($workform_definition !~ /^\s*$/) {
149	$workform_definition =~ s/^\"([^\"]*)\",?//;
150	my $workform_field = $1;
151	push(@workform_values, $workform_field);
152	}
153
154	# Remember this workform definition for when we're reading the records
155	$workform_definitions{$workform_name} = \@workform_values;
156	}
157
158	$self->{'workform_definitions'}->{$filename} = \%workform_definitions;
159	}
160
161
162	sub process
163	{
164	my $self = shift (@_);
165	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
166
167	my $outhandle = $self->{'outhandle'};
168	my $filename = &util::filename_cat($base_dir, $file);
169	my $cursection = $doc_obj->get_top_section();
170
171	# Build up an HTML view of the record for easy display at run-time
172	my $html_record = "<table>";
173
174	# Read the record's workform indicator and record number
175	#$$textref =~ s/^\"([^\"])\",\"([^\"])\",//;
176	$$textref =~ s/^\"([^\"]*)\",//;
177	my $workform_indicator = $1;
178
179	# some procite files have a record number next
180
181	my $recordnum = $$textref =~ s/^\"(\d*)\",//;
182	$recordnum = "undefined" unless defined $recordnum;
183
184	# If necessary, map the workform indicator into something useful
185	if ($crazy_workform_mapping{$workform_indicator}) {
186	$workform_indicator = $crazy_workform_mapping{$workform_indicator};
187	}
188
189	# Check we know about the workform of this record
190	my %workform_definitions = %{$self->{'workform_definitions'}->{$filename}};
191	if (!$workform_definitions{$workform_indicator}) {
192	print STDERR "Unknown workform $workform_indicator!\n";
193	return 0;
194	}
195
196	# Store the full record as the document text
197	$doc_obj->add_utf8_text($cursection, $$textref);
198
199	# Store workform and record number as metadata
200	$doc_obj->add_utf8_metadata($cursection, "Workform", $workform_indicator);
201	$doc_obj->add_utf8_metadata($cursection, "RecordNumber", $recordnum);
202
203	# Store FileFormat metadata
204	$doc_obj->add_metadata($cursection, "FileFormat", "ProCite");
205
206	$html_record .= "<tr><td valign=top><b>Record Number: </b></td><td valign=top>$recordnum</td></tr>";
207
208	my @workform_values = @{$workform_definitions{$workform_indicator}};
209
210	# Read each field (surrounded by quotes) of the record
211	my $fieldnum = 0;
212	while ($$textref !~ /^\s*$/) {
213	$$textref =~ s/^\"([^\"]*)\",?//;
214	my $field_value_raw = $1;
215
216	# Add non-empty metadata values to the document
217	unless ($field_value_raw eq "") {
218	# Add the display name of the metadata field for format statement convenience
219	my $field_name = $workform_values[$fieldnum];
220	#unless ($field_name eq "---") {
221	# my $meta_name = "Field" . ($fieldnum + 1) . "Name";
222	# $doc_obj->add_utf8_metadata($cursection, $meta_name, $field_name);
223	# }
224	if ($field_name eq "---") {
225	$field_name = "Field" . ($fieldnum + 1);
226	}
227	$html_record .= "<tr><td valign=top><b>$field_name: </b></td><td valign=top>";
228
229	# Multiple metadata values are separated with "//"
230	#foreach my $field_value (split(/\/\//, $field_value_raw)) {
231	foreach my $field_value (split($self->{'entry_separator'}, $field_value_raw)) {
232	#my $meta_name = "Field" . ($fieldnum + 1) . "Value";
233	#$doc_obj->add_utf8_metadata($cursection, $meta_name, $field_value);
234	$doc_obj->add_utf8_metadata($cursection, $field_name, $field_value);
235	$html_record .= $field_value . "<br>";
236	}
237
238	$html_record .= "</td></tr>";
239	}
240
241	$fieldnum++;
242	}
243
244	$html_record .= "</table>";
245	# Store HTML view of record as metadata
246	$doc_obj->add_utf8_metadata($cursection, "HTMLDisplay", $html_record);
247
248	# Record was processed successfully
249	return 1;
250	}
251
252
253	1;

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/perllib/plugins/ProCitePlugin.pm@ 32280

Download in other formats: