source: trunk/gsdl/perllib/plugins/FOXPlug.pm@ 5096

Last change on this file since 5096 was 4744, checked in by mdewsnip, 21 years ago

Tidied up and structures (representing the options of the plugin) in preparation for removing the print_usage() routines.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.6 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package FOXPlug;
34
35use BasPlug;
36use util;
37use doc;
38use unicode;
39use cnseg;
40# use gb;
41
42
43sub BEGIN {
44 @ISA = ('BasPlug');
45}
46
47my $options = { 'name' => "FOXPlug",
48 'desc' => "Plugin to process a Foxbase dbt file. This plugin provides the basic functionality to read in the dbt and dbf files and process each record. This general plugin should be overridden for a particular database to process the appropriate fields in the file.",
49 'inherits' => "yes" };
50
51sub new {
52 my ($class) = @_;
53 $self = new BasPlug ();
54
55 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
56 my $option_list = $self->{'option_list'};
57 push( @{$option_list}, $options );
58
59 return bless $self, $class;
60}
61
62sub is_recursive {
63 my $self = shift (@_);
64
65 return 0; # this is not a recursive plugin
66}
67
68
69# return number of files processed, undef if can't process
70# Note that $base_dir might be "" and that $file might
71# include directories
72sub read {
73 my $self = shift (@_);
74 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
75 my $fullname = &util::filename_cat ($base_dir, $file);
76
77 # dbt files are processed at the same time as dbf files
78 return 0 if ($fullname =~ /\.dbt$/i);
79
80 # see if this is a foxbase database
81 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
82
83 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
84
85 # open the file
86 if (!open (FOXBASEIN, $fullname)) {
87 print STDERR "FOXPlug::read - couldn't read $fullname\n";
88 return undef;
89 }
90
91 print STDERR "FOXPlug: processing $file\n";
92
93 # read in the database header
94 my ($temp, %dbf);
95
96 # read in information about dbt file
97 if (read (FOXBASEIN, $temp, 32) < 32) {
98 print STDERR "FOXPlug::read - eof while reading database header";
99 close (FOXBASEIN);
100 return undef;
101 }
102
103 # unpack the header
104 ($dbf{'hasdbt'},
105 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
106 $dbf{'numrecords'}, $dbf{'headerlen'},
107 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
108
109 # process hasdbt
110 if ($dbf{'hasdbt'} == 131) {
111 $dbf{'hasdbt'} = 1;
112 } elsif ($dbf{'hasdbt'} == 3) {
113 $dbf{'hasdbt'} = 0;
114 } else {
115 print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n";
116 return undef;
117 }
118
119 # read in the field description
120 $dbf{'numfields'} = 0;
121 $dbf{'fieldinfo'} = [];
122 while (read (FOXBASEIN, $temp, 1) > 0) {
123 last if ($temp eq "\x0d");
124 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
125
126 my %field = ();
127 $field{'name'} = $self->extracttext($temp, 11);
128 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
129 = unpack ("x11a1VCC", $temp);
130
131 push (@{$dbf{'fieldinfo'}}, \%field);
132
133 $dbf{'numfields'} ++;
134 }
135
136 # open the dbt file if we need to
137 $dbtfullname = $fullname;
138 if ($fullname =~ /f$/) {
139 $dbtfullname =~ s/f$/t/;
140 } else {
141 $dbtfullname =~ s/F$/T/;
142 }
143 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
144 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
145 close (FOXBASEIN);
146 return undef;
147 }
148
149 # read in and process each record in the database
150 my $numrecords = 0;
151 while (($numrecords < $dbf{'numrecords'}) &&
152 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
153
154 # create a new record
155 my $record = [];
156
157 foreach $field (@{$dbf{'fieldinfo'}}) {
158 my $fieldvalue = "";
159
160 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
161 # a memo field, look up this field in the dbt file
162 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
163
164 $seekpos =~ s/^\s*//;
165 $seekpos = 0 unless $seekpos =~ /^\d+$/;
166
167 $seekpos = $seekpos * 512;
168
169 if ($seekpos == 0) {
170 # there is no memo field
171
172 } elsif (seek (DBTIN, $seekpos, 0)) {
173 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
174 last if ($fieldvalue =~ /\cZ/);
175 }
176
177 # remove everything after the control-Z
178 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
179
180 } else {
181 print STDERR "\nERROR - seek (to $seekpos) failed\n";
182 }
183
184 } else {
185 # a normal field
186 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
187 }
188
189 push (@$record, {%$field, 'value'=>$fieldvalue});
190 }
191
192 # process this record
193 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
194 $numrecords, $record);
195
196 # finished another record...
197 $numrecords++;
198 }
199
200 # close the dbt file if we need to
201 if ($dbf{'hasdbt'}) {
202 close (DBTIN);
203 }
204
205 # close the dbf file
206 close (FOXBASEIN);
207
208 # finished processing
209 return 1;
210}
211
212
213# will extract a string from some larger string, making it
214# conform to a number of constraints
215sub extracttext {
216 my $self = shift (@_);
217 my ($text, $maxlen, $offset, $stopstr) = @_;
218 $offset = 0 unless defined $offset;
219 $stopstr = "\x00" unless defined $stopstr;
220
221 # decide where the string finishes
222 my $end = index ($text, $stopstr, $offset);
223 $end = length ($text) if $end < 0;
224 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
225
226 return "" if ($end <= $offset);
227 return substr ($text, $offset, $end-$offset);
228}
229
230
231# process_record should be overriden for a particular type
232# of database. This default version outputs an html document
233# containing all the fields in the record as a table.
234# It also assumes that the text is in utf-8.
235sub process_record {
236 my $self = shift (@_);
237 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
238
239 # create a new document
240 my $doc_obj = new doc ($file, "indexed_doc");
241 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
242 my $section = $doc_obj->get_top_section();
243
244 # start of document
245 $doc_obj->add_utf8_text($section, "<table>\n");
246
247 # add each field
248 foreach $field (@$record) {
249 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
250 $doc_obj->add_utf8_text($section, " <tr>\n");
251 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
252 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
253 $doc_obj->add_utf8_text($section, " </tr>\n");
254 }
255 }
256
257 # end of document
258 $doc_obj->add_utf8_text($section, "</table>\n");
259
260 # add an object id
261 $doc_obj->set_OID();
262
263 # process the document
264 $processor->process($doc_obj);
265}
266
267
2681;
Note: See TracBrowser for help on using the repository browser.