Context Navigation

source: main/trunk/greenstone2/perllib/plugins/FOXPlugin.pm@ 31492

Last change on this file since 31492 was 31492, checked in by kjdon, 7 years ago
renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 8.7 KB

Line
1	###########################################################################
2	#
3	# FOXPlugin.pm
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25
26	# plugin to process a Foxbase dbt file. This plugin provides the basic
27	# functionality to read in the dbt and dbf files and process each record.
28	# This general plugin should be overridden for a particular database to process
29	# the appropriate fields in the file.
30
31	package FOXPlugin;
32
33	use BaseImporter;
34	use util;
35	use doc;
36	use unicode;
37
38	use strict;
39	no strict 'refs'; # allow filehandles to be variables and viceversa
40
41
42	sub BEGIN {
43	@FOXPlugin::ISA = ('BaseImporter');
44	}
45
46	my $arguments =
47	[ { 'name' => "process_exp",
48	'desc' => "{BaseImporter.process_exp}",
49	'type' => "regexp",
50	'reqd' => "no",
51	'deft' => &get_default_process_exp() },
52	{ 'name' => "block_exp",
53	'desc' => "{BaseImporter.block_exp}",
54	'type' => "regexp",
55	'reqd' => "no",
56	'deft' => &get_default_block_exp() } ];
57
58	my $options = { 'name' => "FOXPlugin",
59	'desc' => "{FOXPlugin.desc}",
60	'abstract' => "no",
61	'inherits' => "yes",
62	'args' => $arguments };
63
64	sub new {
65	my ($class) = shift (@_);
66	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
67	push(@$pluginlist, $class);
68
69	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70	push(@{$hashArgOptLists->{"OptList"}},$options);
71
72	my $self = new BaseImporter($pluginlist, $inputargs, $hashArgOptLists);
73
74	return bless $self, $class;
75	}
76
77	sub get_default_process_exp {
78	my $self = shift (@_);
79
80	return q^(?i)\.dbf$^;
81	}
82
83	#dbt files are processed at the same time as dbf files
84	sub get_default_block_exp {
85	my $self = shift (@_);
86
87	return q^(?i)\.dbt$^;
88	}
89
90	# return number of files processed, undef if can't process
91	# Note that $base_dir might be "" and that $file might
92	# include directories
93	sub read {
94	my $self = shift (@_);
95	my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
96
97	# can we process this file??
98	my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
99	return undef unless $self->can_process_this_file($filename_full_path);
100
101	print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli);
102	print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1;
103
104	my ($parent_dir) = $filename_full_path =~ /^(.*)\/[^\/]+\.dbf$/i;
105
106	# open the file
107	if (!open (FOXBASEIN, $filename_full_path)) {
108	if ($gli) {
109	print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n";
110	}
111	print STDERR "FOXPlugin::read - couldn't read $filename_full_path\n";
112	return -1; # error in processing
113	}
114
115	# read in the database header
116	my ($temp, %dbf);
117
118	# read in information about dbt file
119	if (read (FOXBASEIN, $temp, 32) < 32) {
120	if ($gli) {
121	print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
122	}
123	print STDERR "FOXPlugin::read - eof while reading database header\n";
124	close (FOXBASEIN);
125	return -1;
126	}
127
128	# unpack the header
129	($dbf{'hasdbt'},
130	$dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
131	$dbf{'numrecords'}, $dbf{'headerlen'},
132	$dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
133
134	# process hasdbt
135	if ($dbf{'hasdbt'} == 131) {
136	$dbf{'hasdbt'} = 1;
137	} elsif ($dbf{'hasdbt'} == 3 \|\| $dbf{'hasdbt'} == 48) {
138	$dbf{'hasdbt'} = 0;
139	} else {
140	if ($gli) {
141	print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
142	}
143	print STDERR "FOXPlugin:read - $filename_full_path doesn't seem to be a Foxbase file\n";
144	return -1;
145	}
146
147	# read in the field description
148	$dbf{'numfields'} = 0;
149	$dbf{'fieldinfo'} = [];
150	while (read (FOXBASEIN, $temp, 1) > 0) {
151	last if ($temp eq "\x0d");
152	last if (read (FOXBASEIN, $temp, 31, 1) < 31);
153
154	my %field = ();
155	$field{'name'} = $self->extracttext($temp, 11);
156	($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
157	= unpack ("x11a1VCC", $temp);
158
159	push (@{$dbf{'fieldinfo'}}, \%field);
160
161	$dbf{'numfields'} ++;
162	}
163
164	# open the dbt file if we need to
165	my $dbtfullname = $filename_full_path;
166	if ($filename_full_path =~ /f$/) {
167	$dbtfullname =~ s/f$/t/;
168	} else {
169	$dbtfullname =~ s/F$/T/;
170	}
171	if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
172	if ($gli) {
173	print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
174	}
175	print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n";
176	close (FOXBASEIN);
177	return -1;
178	}
179
180	# read in and process each record in the database
181	my $numrecords = 0;
182	while (($numrecords < $dbf{'numrecords'}) &&
183	(read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
184
185	# create a new record
186	my $record = [];
187
188	foreach my $field (@{$dbf{'fieldinfo'}}) {
189	my $fieldvalue = "";
190
191	if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
192	# a memo field, look up this field in the dbt file
193	my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
194
195	$seekpos =~ s/^\s*//;
196	$seekpos = 0 unless $seekpos =~ /^\d+$/;
197
198	$seekpos = $seekpos * 512;
199
200	if ($seekpos == 0) {
201	# there is no memo field
202
203	} elsif (seek (DBTIN, $seekpos, 0)) {
204	while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
205	last if ($fieldvalue =~ /\cZ/);
206	}
207
208	# remove everything after the control-Z
209	substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
210
211	} else {
212	print STDERR "\nERROR - seek (to $seekpos) failed\n";
213	}
214
215	} else {
216	# a normal field
217	$fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
218	}
219
220	push (@$record, {%$field, 'value'=>$fieldvalue});
221	}
222
223	# process this record
224	$self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
225	$numrecords, $record);
226
227	# finished another record...
228	$numrecords++;
229	}
230
231	# close the dbt file if we need to
232	if ($dbf{'hasdbt'}) {
233	close (DBTIN);
234	}
235
236	# close the dbf file
237	close (FOXBASEIN);
238
239	# finished processing
240	return 1;
241	}
242
243
244	# will extract a string from some larger string, making it
245	# conform to a number of constraints
246	sub extracttext {
247	my $self = shift (@_);
248	my ($text, $maxlen, $offset, $stopstr) = @_;
249	$offset = 0 unless defined $offset;
250	$stopstr = "\x00" unless defined $stopstr;
251
252	# decide where the string finishes
253	my $end = index ($text, $stopstr, $offset);
254	$end = length ($text) if $end < 0;
255	$end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
256
257	return "" if ($end <= $offset);
258	return substr ($text, $offset, $end-$offset);
259	}
260
261
262	# process_record should be overriden for a particular type
263	# of database. This default version outputs an html document
264	# containing all the fields in the record as a table.
265	# It also assumes that the text is in utf-8.
266	sub process_record {
267	my $self = shift (@_);
268	my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
269
270	# create a new document
271	my $doc_obj = new doc ($file, "indexed_doc", $self->{'file_rename_method'});
272
273	$doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
274	my $section = $doc_obj->get_top_section();
275
276	$doc_obj->add_metadata($section, "FileFormat", "FOX");
277	$doc_obj->add_metadata($section, "FileSize", (-s $file));
278
279	# start of document
280	$doc_obj->add_utf8_text($section, "<table>\n");
281
282	# add each field
283	foreach my $field (@$record) {
284	if (defined ($field->{'name'}) && defined ($field->{'value'})) {
285	$doc_obj->add_utf8_text($section, " <tr>\n");
286	$doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
287	$doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
288	$doc_obj->add_utf8_text($section, " </tr>\n");
289	}
290	}
291
292	# end of document
293	$doc_obj->add_utf8_text($section, "</table>\n");
294
295	# add an object id
296	$self->add_OID($doc_obj);
297
298	# process the document
299	$processor->process($doc_obj);
300	}
301
302
303	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: