source: trunk/gsdl/perllib/plugins/FOXPlug.pm@ 965

Last change on this file since 965 was 537, checked in by sjboddie, 25 years ago

added GPL headers

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31package FOXPlug;
32
33use BasPlug;
34use util;
35use doc;
36use unicode;
37use cnseg;
38use gb;
39
40
41sub BEGIN {
42 @ISA = ('BasPlug');
43}
44
45sub new {
46 my ($class) = @_;
47 $self = new BasPlug ();
48
49 return bless $self, $class;
50}
51
52sub is_recursive {
53 my $self = shift (@_);
54
55 return 0; # this is not a recursive plugin
56}
57
58
59# return number of files processed, undef if can't process
60# Note that $base_dir might be "" and that $file might
61# include directories
62sub read {
63 my $self = shift (@_);
64 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
65 my $fullname = &util::filename_cat ($base_dir, $file);
66
67 # dbt files are processed at the same time as dbf files
68 return 0 if ($fullname =~ /\.dbt$/i);
69
70 # see if this is a foxbase database
71 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
72
73 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
74
75 # open the file
76 if (!open (FOXBASEIN, $fullname)) {
77 print STDERR "FOXPlug::read - couldn't read $fullname\n";
78 return undef;
79 }
80
81 print STDERR "FOXPlug: processing $file\n";
82
83 # read in the database header
84 my ($temp, %dbf);
85
86 # read in information about dbt file
87 if (read (FOXBASEIN, $temp, 32) < 32) {
88 print STDERR "FOXPlug::read - eof while reading database header";
89 close (FOXBASEIN);
90 return undef;
91 }
92
93 # unpack the header
94 ($dbf{'hasdbt'},
95 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
96 $dbf{'numrecords'}, $dbf{'headerlen'},
97 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
98
99 # process hasdbt
100 if ($dbf{'hasdbt'} == 131) {
101 $dbf{'hasdbt'} = 1;
102 } elsif ($dbf{'hasdbt'} == 3) {
103 $dbf{'hasdbt'} = 0;
104 } else {
105 print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n";
106 return undef;
107 }
108
109 # read in the field description
110 $dbf{'numfields'} = 0;
111 $dbf{'fieldinfo'} = [];
112 while (read (FOXBASEIN, $temp, 1) > 0) {
113 last if ($temp eq "\x0d");
114 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
115
116 my %field = ();
117 $field{'name'} = $self->extracttext($temp, 11);
118 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
119 = unpack ("x11a1VCC", $temp);
120
121 push (@{$dbf{'fieldinfo'}}, \%field);
122
123 $dbf{'numfields'} ++;
124 }
125
126 # open the dbt file if we need to
127 $dbtfullname = $fullname;
128 if ($fullname =~ /f$/) {
129 $dbtfullname =~ s/f$/t/;
130 } else {
131 $dbtfullname =~ s/F$/T/;
132 }
133 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
134 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
135 close (FOXBASEIN);
136 return undef;
137 }
138
139 # read in and process each record in the database
140 my $numrecords = 0;
141 while (($numrecords < $dbf{'numrecords'}) &&
142 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
143
144 # create a new record
145 my $record = [];
146
147 foreach $field (@{$dbf{'fieldinfo'}}) {
148 my $fieldvalue = "";
149
150 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
151 # a memo field, look up this field in the dbt file
152 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
153
154 $seekpos =~ s/^\s*//;
155 $seekpos = 0 unless $seekpos =~ /^\d+$/;
156
157 $seekpos = $seekpos * 512;
158
159 if ($seekpos == 0) {
160 # there is no memo field
161
162 } elsif (seek (DBTIN, $seekpos, 0)) {
163 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
164 last if ($fieldvalue =~ /\cZ/);
165 }
166
167 # remove everything after the control-Z
168 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
169
170 } else {
171 print STDERR "\nERROR - seek (to $seekpos) failed\n";
172 }
173
174 } else {
175 # a normal field
176 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
177 }
178
179 push (@$record, {%$field, 'value'=>$fieldvalue});
180 }
181
182 # process this record
183 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
184 $numrecords, $record);
185
186 # finished another record...
187 $numrecords++;
188 }
189
190 # close the dbt file if we need to
191 if ($dbf{'hasdbt'}) {
192 close (DBTIN);
193 }
194
195 # close the dbf file
196 close (FOXBASEIN);
197
198 # finished processing
199 return 1;
200}
201
202
203# will extract a string from some larger string, making it
204# conform to a number of constraints
205sub extracttext {
206 my $self = shift (@_);
207 my ($text, $maxlen, $offset, $stopstr) = @_;
208 $offset = 0 unless defined $offset;
209 $stopstr = "\x00" unless defined $stopstr;
210
211 # decide where the string finishes
212 my $end = index ($text, $stopstr, $offset);
213 $end = length ($text) if $end < 0;
214 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
215
216 return "" if ($end <= $offset);
217 return substr ($text, $offset, $end-$offset);
218}
219
220
221# process_record should be overriden for a particular type
222# of database. This default version outputs an html document
223# containing all the fields in the record as a table.
224# It also assumes that the text is in utf-8.
225sub process_record {
226 my $self = shift (@_);
227 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
228
229 # create a new document
230 my $doc_obj = new doc ($file, "indexed_doc");
231 my $section = $doc_obj->get_top_section();
232
233 # start of document
234 $doc_obj->add_utf8_text($section, "<table>\n");
235
236 # add each field
237 foreach $field (@$record) {
238 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
239 $doc_obj->add_utf8_text($section, " <tr>\n");
240 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
241 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
242 $doc_obj->add_utf8_text($section, " </tr>\n");
243 }
244 }
245
246 # end of document
247 $doc_obj->add_utf8_text($section, "</table>\n");
248
249 # add an object id
250 $doc_obj->set_OID();
251
252 # process the document
253 $processor->process($doc_obj);
254}
255
256
2571;
Note: See TracBrowser for help on using the repository browser.