Context Navigation

source: gsdl/trunk/perllib/plugins/ProCitePlugin.pm@ 16104

Last change on this file since 16104 was 16104, checked in by kjdon, 16 years ago
tried to make the 'xxxplugin processing file' print statements more consistent. They are now done in read (or read_into_doc_obj) and not process
Property svn:keywords set to `Author Date Id Revision`
File size: 7.0 KB

Line
1	###########################################################################
2	#
3	# ProCitePlugin.pm -- A plugin for (exported) ProCite databases
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright 1999-2004 New Zealand Digital Library Project
10	#
11	# This program is free software; you can redistribute it and/or modify
12	# it under the terms of the GNU General Public License as published by
13	# the Free Software Foundation; either version 2 of the License, or
14	# (at your option) any later version.
15	#
16	# This program is distributed in the hope that it will be useful,
17	# but WITHOUT ANY WARRANTY; without even the implied warranty of
18	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	# GNU General Public License for more details.
20	#
21	# You should have received a copy of the GNU General Public License
22	# along with this program; if not, write to the Free Software
23	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	#
25	###########################################################################
26
27	package ProCitePlugin;
28
29
30	use multiread;
31	use SplitTextFile;
32
33	use strict;
34	no strict 'refs'; # allow filehandles to be variables and viceversa
35
36	# ProCitePlugin is a sub-class of SplitTextFile
37	sub BEGIN {
38	@ProCitePlugin::ISA = ('SplitTextFile');
39	}
40
41
42	my $arguments =
43	[ { 'name' => "process_exp",
44	'desc' => "{BasePlugin.process_exp}",
45	'type' => "regexp",
46	'reqd' => "no",
47	'deft' => &get_default_process_exp() },
48	{ 'name' => "split_exp",
49	'desc' => "{SplitTextFile.split_exp}",
50	'type' => "regexp",
51	'deft' => &get_default_split_exp(),
52	'reqd' => "no" }
53	];
54
55	my $options = { 'name' => "ProCitePlugin",
56	'desc' => "{ProCitePlugin.desc}",
57	'abstract' => "no",
58	'inherits' => "yes",
59	'explodes' => "yes",
60	'args' => $arguments };
61
62
63	# This plugin processes exported ProCite files with the suffix ".txt"
64	sub get_default_process_exp
65	{
66	return q^(?i)(\.txt)$^;
67	}
68
69
70	# This plugin splits the input text at every line
71	sub get_default_split_exp
72	{
73	return q^\n^;
74	}
75
76
77	sub new
78	{
79	my ($class) = shift (@_);
80	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
81	push(@$pluginlist, $class);
82
83	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
84	push(@{$hashArgOptLists->{"OptList"}},$options);
85
86	my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
87
88	return bless $self, $class;
89	}
90
91
92	my %crazy_workform_mapping =
93	( "A", "Book, Long Form",
94	"B", "Book, Short Form",
95	"C", "Journal, Long Form",
96	"D", "Journal, Short Form",
97	"E", "Report",
98	"F", "Newspaper",
99	"G", "Dissertation",
100	"H", "Trade Catalog",
101	"I", "Letter (Correspondence)",
102	"J", "Manuscript",
103	"K", "Conference Proceedings",
104	"L", "Map",
105	"M", "Music Score",
106	"N", "Sound Recording",
107	"O", "Motion Picture",
108	"P", "Audiovisual Material",
109	"Q", "Video Recording",
110	"R", "Art Work",
111	"S", "Computer Program",
112	"T", "Data File" );
113
114
115	sub read_file
116	{
117	my $self = shift (@_);
118	my ($filename, $encoding, $language, $textref) = @_;
119
120	# Store the workform definitions for this file
121	my %workform_definitions = ();
122
123	# Read the contents of the file into $textref
124	open(PROCITE_FILE, "<$filename");
125	my $reader = new multiread();
126	$reader->set_handle ('ProCitePlugin::PROCITE_FILE');
127	$reader->set_encoding ($encoding);
128	$reader->read_file ($textref);
129	close(PROCITE_FILE);
130
131	# Read the workform definitions at the start of the file
132	while ($$textref =~ /^\<Workform Definition\>/) {
133	# Remove the workform definition line so it is not processed later as a record
134	$$textref =~ s/^\<Workform Definition\>(.*)\n//;
135	my $workform_definition = $1;
136	# Parse the workform definitions and store them for later
137	$workform_definition =~ s/^\"([^\"]*)\",//;
138	my $workform_name = $1;
139	my @workform_values;
140	while ($workform_definition !~ /^\s*$/) {
141	$workform_definition =~ s/^\"([^\"]*)\",?//;
142	my $workform_field = $1;
143	push(@workform_values, $workform_field);
144	}
145
146	# Remember this workform definition for when we're reading the records
147	$workform_definitions{$workform_name} = \@workform_values;
148	}
149
150	$self->{'workform_definitions'}->{$filename} = \%workform_definitions;
151	}
152
153
154	sub process
155	{
156	my $self = shift (@_);
157	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
158
159	my $outhandle = $self->{'outhandle'};
160	my $filename = &util::filename_cat($base_dir, $file);
161	my $cursection = $doc_obj->get_top_section();
162
163	# Build up an HTML view of the record for easy display at run-time
164	my $html_record = "<table>";
165
166	# Read the record's workform indicator and record number
167	$$textref =~ s/^\"([^\"])\",\"([^\"])\",//;
168	my $workform_indicator = $1;
169	my $recordnum = $2;
170
171	# If necessary, map the workform indicator into something useful
172	if ($crazy_workform_mapping{$workform_indicator}) {
173	$workform_indicator = $crazy_workform_mapping{$workform_indicator};
174	}
175
176	# Check we know about the workform of this record
177	my %workform_definitions = %{$self->{'workform_definitions'}->{$filename}};
178	if (!$workform_definitions{$workform_indicator}) {
179	print STDERR "Unknown workform $workform_indicator!\n";
180	return 0;
181	}
182
183	# Store the full record as the document text
184	$doc_obj->add_utf8_text($cursection, $$textref);
185
186	# Store workform and record number as metadata
187	$doc_obj->add_utf8_metadata($cursection, "pc.Workform", $workform_indicator);
188	$doc_obj->add_utf8_metadata($cursection, "pc.RecordNumber", $recordnum);
189
190	# Store FileFormat metadata
191	$doc_obj->add_metadata($cursection, "FileFormat", "ProCite");
192
193	$html_record .= "<tr><td valign=top><b>Record Number: </b></td><td valign=top>$recordnum</td></tr>";
194
195	my @workform_values = @{$workform_definitions{$workform_indicator}};
196
197	# Read each field (surrounded by quotes) of the record
198	my $fieldnum = 0;
199	while ($$textref !~ /^\s*$/) {
200	$$textref =~ s/^\"([^\"]*)\",?//;
201	my $field_value_raw = $1;
202
203	# Add non-empty metadata values to the document
204	unless ($field_value_raw eq "") {
205	# Add the display name of the metadata field for format statement convenience
206	my $field_name = $workform_values[$fieldnum];
207	unless ($field_name eq "---") {
208	my $meta_name = "pc.Field" . ($fieldnum + 1) . "Name";
209	$doc_obj->add_utf8_metadata($cursection, $meta_name, $field_name);
210	}
211
212	$html_record .= "<tr><td valign=top><b>$field_name: </b></td><td valign=top>";
213
214	# Multiple metadata values are separated with "//"
215	foreach my $field_value (split(/\/\//, $field_value_raw)) {
216	my $meta_name = "pc.Field" . ($fieldnum + 1) . "Value";
217	$doc_obj->add_utf8_metadata($cursection, $meta_name, $field_value);
218
219	$html_record .= $field_value . "<br>";
220	}
221
222	$html_record .= "</td></tr>";
223	}
224
225	$fieldnum++;
226	}
227
228	$html_record .= "</table>";
229	# Store HTML view of record as metadata
230	$doc_obj->add_utf8_metadata($cursection, "HTMLDisplay", $html_record);
231
232	# Record was processed successfully
233	return 1;
234	}
235
236
237	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: