source: trunk/gsdl/perllib/plugins/FOXPlug.pm@ 9853

Last change on this file since 9853 was 9853, checked in by kjdon, 19 years ago

fixed up maxdocs - now pass an extra parameter to the read function

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.1 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package FOXPlug;
34
35use BasPlug;
36use util;
37use doc;
38use unicode;
39use cnseg;
40# use gb;
41
42
43sub BEGIN {
44 @ISA = ('BasPlug');
45}
46
47my $options = { 'name' => "FOXPlug",
48 'desc' => "{FOXPlug.desc}",
49 'abstract' => "no",
50 'inherits' => "yes" };
51
52sub new {
53 my ($class) = @_;
54 $self = new BasPlug ();
55 $self->{'plugin_type'} = "FOXPlug";
56 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
57 my $option_list = $self->{'option_list'};
58 push( @{$option_list}, $options );
59
60 return bless $self, $class;
61}
62
63sub is_recursive {
64 my $self = shift (@_);
65
66 return 0; # this is not a recursive plugin
67}
68
69
70# return number of files processed, undef if can't process
71# Note that $base_dir might be "" and that $file might
72# include directories
73sub read {
74 my $self = shift (@_);
75 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
76 my $fullname = &util::filename_cat ($base_dir, $file);
77
78 # dbt files are processed at the same time as dbf files
79 return 0 if ($fullname =~ /\.dbt$/i);
80
81 # see if this is a foxbase database
82 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
83
84 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
85
86 # open the file
87 if (!open (FOXBASEIN, $fullname)) {
88 if ($gli) {
89 print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n";
90 }
91 print STDERR "FOXPlug::read - couldn't read $fullname\n";
92 return -1; # error in processing
93 }
94
95 print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
96
97 print STDERR "FOXPlug: processing $file\n";
98
99 # read in the database header
100 my ($temp, %dbf);
101
102 # read in information about dbt file
103 if (read (FOXBASEIN, $temp, 32) < 32) {
104 if ($gli) {
105 print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
106 }
107 print STDERR "FOXPlug::read - eof while reading database header\n";
108 close (FOXBASEIN);
109 return -1;
110 }
111
112 # unpack the header
113 ($dbf{'hasdbt'},
114 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
115 $dbf{'numrecords'}, $dbf{'headerlen'},
116 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
117
118 # process hasdbt
119 if ($dbf{'hasdbt'} == 131) {
120 $dbf{'hasdbt'} = 1;
121 } elsif ($dbf{'hasdbt'} == 3 || $dbf{'hasdbt'} == 48) {
122 $dbf{'hasdbt'} = 0;
123 } else {
124 if ($gli) {
125 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
126 }
127 print STDERR "FOXPlug:read - $fullname doesn't seem to be a Foxbase file\n";
128 return -1;
129 }
130
131 # read in the field description
132 $dbf{'numfields'} = 0;
133 $dbf{'fieldinfo'} = [];
134 while (read (FOXBASEIN, $temp, 1) > 0) {
135 last if ($temp eq "\x0d");
136 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
137
138 my %field = ();
139 $field{'name'} = $self->extracttext($temp, 11);
140 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
141 = unpack ("x11a1VCC", $temp);
142
143 push (@{$dbf{'fieldinfo'}}, \%field);
144
145 $dbf{'numfields'} ++;
146 }
147
148 # open the dbt file if we need to
149 $dbtfullname = $fullname;
150 if ($fullname =~ /f$/) {
151 $dbtfullname =~ s/f$/t/;
152 } else {
153 $dbtfullname =~ s/F$/T/;
154 }
155 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
156 if ($gli) {
157 print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
158 }
159 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
160 close (FOXBASEIN);
161 return -1;
162 }
163
164 # read in and process each record in the database
165 my $numrecords = 0;
166 while (($numrecords < $dbf{'numrecords'}) &&
167 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
168
169 # create a new record
170 my $record = [];
171
172 foreach $field (@{$dbf{'fieldinfo'}}) {
173 my $fieldvalue = "";
174
175 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
176 # a memo field, look up this field in the dbt file
177 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
178
179 $seekpos =~ s/^\s*//;
180 $seekpos = 0 unless $seekpos =~ /^\d+$/;
181
182 $seekpos = $seekpos * 512;
183
184 if ($seekpos == 0) {
185 # there is no memo field
186
187 } elsif (seek (DBTIN, $seekpos, 0)) {
188 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
189 last if ($fieldvalue =~ /\cZ/);
190 }
191
192 # remove everything after the control-Z
193 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
194
195 } else {
196 print STDERR "\nERROR - seek (to $seekpos) failed\n";
197 }
198
199 } else {
200 # a normal field
201 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
202 }
203
204 push (@$record, {%$field, 'value'=>$fieldvalue});
205 }
206
207 # process this record
208 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
209 $numrecords, $record);
210
211 # finished another record...
212 $numrecords++;
213 }
214
215 # close the dbt file if we need to
216 if ($dbf{'hasdbt'}) {
217 close (DBTIN);
218 }
219
220 # close the dbf file
221 close (FOXBASEIN);
222
223 # finished processing
224 return 1;
225}
226
227
228# will extract a string from some larger string, making it
229# conform to a number of constraints
230sub extracttext {
231 my $self = shift (@_);
232 my ($text, $maxlen, $offset, $stopstr) = @_;
233 $offset = 0 unless defined $offset;
234 $stopstr = "\x00" unless defined $stopstr;
235
236 # decide where the string finishes
237 my $end = index ($text, $stopstr, $offset);
238 $end = length ($text) if $end < 0;
239 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
240
241 return "" if ($end <= $offset);
242 return substr ($text, $offset, $end-$offset);
243}
244
245
246# process_record should be overriden for a particular type
247# of database. This default version outputs an html document
248# containing all the fields in the record as a table.
249# It also assumes that the text is in utf-8.
250sub process_record {
251 my $self = shift (@_);
252 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
253
254 # create a new document
255 my $doc_obj = new doc ($file, "indexed_doc");
256 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
257 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
258 my $section = $doc_obj->get_top_section();
259
260 $doc_obj->add_metadata($section, "FileFormat", "FOX");
261 $doc_obj->add_metadata($section, "FileSize", (-s $file));
262
263 # start of document
264 $doc_obj->add_utf8_text($section, "<table>\n");
265
266 # add each field
267 foreach $field (@$record) {
268 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
269 $doc_obj->add_utf8_text($section, " <tr>\n");
270 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
271 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
272 $doc_obj->add_utf8_text($section, " </tr>\n");
273 }
274 }
275
276 # end of document
277 $doc_obj->add_utf8_text($section, "</table>\n");
278
279 # add an object id
280 $doc_obj->set_OID();
281
282 # process the document
283 $processor->process($doc_obj);
284}
285
286
2871;
Note: See TracBrowser for help on using the repository browser.