source: main/trunk/greenstone2/perllib/plugins/FOXPlugin.pm@ 31492

Last change on this file since 31492 was 31492, checked in by kjdon, 7 years ago

renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1###########################################################################
2#
3# FOXPlugin.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31package FOXPlugin;
32
33use BaseImporter;
34use util;
35use doc;
36use unicode;
37
38use strict;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41
42sub BEGIN {
43 @FOXPlugin::ISA = ('BaseImporter');
44}
45
46my $arguments =
47 [ { 'name' => "process_exp",
48 'desc' => "{BaseImporter.process_exp}",
49 'type' => "regexp",
50 'reqd' => "no",
51 'deft' => &get_default_process_exp() },
52 { 'name' => "block_exp",
53 'desc' => "{BaseImporter.block_exp}",
54 'type' => "regexp",
55 'reqd' => "no",
56 'deft' => &get_default_block_exp() } ];
57
58my $options = { 'name' => "FOXPlugin",
59 'desc' => "{FOXPlugin.desc}",
60 'abstract' => "no",
61 'inherits' => "yes",
62 'args' => $arguments };
63
64sub new {
65 my ($class) = shift (@_);
66 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
67 push(@$pluginlist, $class);
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new BaseImporter($pluginlist, $inputargs, $hashArgOptLists);
73
74 return bless $self, $class;
75}
76
77sub get_default_process_exp {
78 my $self = shift (@_);
79
80 return q^(?i)\.dbf$^;
81}
82
83#dbt files are processed at the same time as dbf files
84sub get_default_block_exp {
85 my $self = shift (@_);
86
87 return q^(?i)\.dbt$^;
88}
89
90# return number of files processed, undef if can't process
91# Note that $base_dir might be "" and that $file might
92# include directories
93sub read {
94 my $self = shift (@_);
95 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
96
97 # can we process this file??
98 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
99 return undef unless $self->can_process_this_file($filename_full_path);
100
101 print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli);
102 print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1;
103
104 my ($parent_dir) = $filename_full_path =~ /^(.*)\/[^\/]+\.dbf$/i;
105
106 # open the file
107 if (!open (FOXBASEIN, $filename_full_path)) {
108 if ($gli) {
109 print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n";
110 }
111 print STDERR "FOXPlugin::read - couldn't read $filename_full_path\n";
112 return -1; # error in processing
113 }
114
115 # read in the database header
116 my ($temp, %dbf);
117
118 # read in information about dbt file
119 if (read (FOXBASEIN, $temp, 32) < 32) {
120 if ($gli) {
121 print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
122 }
123 print STDERR "FOXPlugin::read - eof while reading database header\n";
124 close (FOXBASEIN);
125 return -1;
126 }
127
128 # unpack the header
129 ($dbf{'hasdbt'},
130 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
131 $dbf{'numrecords'}, $dbf{'headerlen'},
132 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
133
134 # process hasdbt
135 if ($dbf{'hasdbt'} == 131) {
136 $dbf{'hasdbt'} = 1;
137 } elsif ($dbf{'hasdbt'} == 3 || $dbf{'hasdbt'} == 48) {
138 $dbf{'hasdbt'} = 0;
139 } else {
140 if ($gli) {
141 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
142 }
143 print STDERR "FOXPlugin:read - $filename_full_path doesn't seem to be a Foxbase file\n";
144 return -1;
145 }
146
147 # read in the field description
148 $dbf{'numfields'} = 0;
149 $dbf{'fieldinfo'} = [];
150 while (read (FOXBASEIN, $temp, 1) > 0) {
151 last if ($temp eq "\x0d");
152 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
153
154 my %field = ();
155 $field{'name'} = $self->extracttext($temp, 11);
156 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
157 = unpack ("x11a1VCC", $temp);
158
159 push (@{$dbf{'fieldinfo'}}, \%field);
160
161 $dbf{'numfields'} ++;
162 }
163
164 # open the dbt file if we need to
165 my $dbtfullname = $filename_full_path;
166 if ($filename_full_path =~ /f$/) {
167 $dbtfullname =~ s/f$/t/;
168 } else {
169 $dbtfullname =~ s/F$/T/;
170 }
171 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
172 if ($gli) {
173 print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
174 }
175 print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n";
176 close (FOXBASEIN);
177 return -1;
178 }
179
180 # read in and process each record in the database
181 my $numrecords = 0;
182 while (($numrecords < $dbf{'numrecords'}) &&
183 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
184
185 # create a new record
186 my $record = [];
187
188 foreach my $field (@{$dbf{'fieldinfo'}}) {
189 my $fieldvalue = "";
190
191 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
192 # a memo field, look up this field in the dbt file
193 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
194
195 $seekpos =~ s/^\s*//;
196 $seekpos = 0 unless $seekpos =~ /^\d+$/;
197
198 $seekpos = $seekpos * 512;
199
200 if ($seekpos == 0) {
201 # there is no memo field
202
203 } elsif (seek (DBTIN, $seekpos, 0)) {
204 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
205 last if ($fieldvalue =~ /\cZ/);
206 }
207
208 # remove everything after the control-Z
209 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
210
211 } else {
212 print STDERR "\nERROR - seek (to $seekpos) failed\n";
213 }
214
215 } else {
216 # a normal field
217 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
218 }
219
220 push (@$record, {%$field, 'value'=>$fieldvalue});
221 }
222
223 # process this record
224 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
225 $numrecords, $record);
226
227 # finished another record...
228 $numrecords++;
229 }
230
231 # close the dbt file if we need to
232 if ($dbf{'hasdbt'}) {
233 close (DBTIN);
234 }
235
236 # close the dbf file
237 close (FOXBASEIN);
238
239 # finished processing
240 return 1;
241}
242
243
244# will extract a string from some larger string, making it
245# conform to a number of constraints
246sub extracttext {
247 my $self = shift (@_);
248 my ($text, $maxlen, $offset, $stopstr) = @_;
249 $offset = 0 unless defined $offset;
250 $stopstr = "\x00" unless defined $stopstr;
251
252 # decide where the string finishes
253 my $end = index ($text, $stopstr, $offset);
254 $end = length ($text) if $end < 0;
255 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
256
257 return "" if ($end <= $offset);
258 return substr ($text, $offset, $end-$offset);
259}
260
261
262# process_record should be overriden for a particular type
263# of database. This default version outputs an html document
264# containing all the fields in the record as a table.
265# It also assumes that the text is in utf-8.
266sub process_record {
267 my $self = shift (@_);
268 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
269
270 # create a new document
271 my $doc_obj = new doc ($file, "indexed_doc", $self->{'file_rename_method'});
272
273 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
274 my $section = $doc_obj->get_top_section();
275
276 $doc_obj->add_metadata($section, "FileFormat", "FOX");
277 $doc_obj->add_metadata($section, "FileSize", (-s $file));
278
279 # start of document
280 $doc_obj->add_utf8_text($section, "<table>\n");
281
282 # add each field
283 foreach my $field (@$record) {
284 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
285 $doc_obj->add_utf8_text($section, " <tr>\n");
286 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
287 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
288 $doc_obj->add_utf8_text($section, " </tr>\n");
289 }
290 }
291
292 # end of document
293 $doc_obj->add_utf8_text($section, "</table>\n");
294
295 # add an object id
296 $self->add_OID($doc_obj);
297
298 # process the document
299 $processor->process($doc_obj);
300}
301
302
3031;
Note: See TracBrowser for help on using the repository browser.