source: trunk/gsdl/perllib/plugins/FOXPlug.pm@ 10606

Last change on this file since 10606 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package FOXPlug;
34
35use BasPlug;
36use util;
37use doc;
38use unicode;
39use cnseg;
40# use gb;
41
42use strict;
43no strict 'refs'; # allow filehandles to be variables and viceversa
44
45
46sub BEGIN {
47 @FOXPlug::ISA = ('BasPlug');
48}
49
50my $arguments = [
51 ];
52
53my $options = { 'name' => "FOXPlug",
54 'desc' => "{FOXPlug.desc}",
55 'abstract' => "no",
56 'inherits' => "yes" };
57
58sub new {
59 my ($class) = shift (@_);
60 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
61 push(@$pluginlist, $class);
62
63 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
64 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
65
66 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
67
68 return bless $self, $class;
69}
70
71sub is_recursive {
72 my $self = shift (@_);
73
74 return 0; # this is not a recursive plugin
75}
76
77
78# return number of files processed, undef if can't process
79# Note that $base_dir might be "" and that $file might
80# include directories
81sub read {
82 my $self = shift (@_);
83 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
84 my $fullname = &util::filename_cat ($base_dir, $file);
85
86 # dbt files are processed at the same time as dbf files
87 return 0 if ($fullname =~ /\.dbt$/i);
88
89 # see if this is a foxbase database
90 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
91
92 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
93
94 # open the file
95 if (!open (FOXBASEIN, $fullname)) {
96 if ($gli) {
97 print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n";
98 }
99 print STDERR "FOXPlug::read - couldn't read $fullname\n";
100 return -1; # error in processing
101 }
102
103 print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
104
105 print STDERR "FOXPlug: processing $file\n";
106
107 # read in the database header
108 my ($temp, %dbf);
109
110 # read in information about dbt file
111 if (read (FOXBASEIN, $temp, 32) < 32) {
112 if ($gli) {
113 print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
114 }
115 print STDERR "FOXPlug::read - eof while reading database header\n";
116 close (FOXBASEIN);
117 return -1;
118 }
119
120 # unpack the header
121 ($dbf{'hasdbt'},
122 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
123 $dbf{'numrecords'}, $dbf{'headerlen'},
124 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
125
126 # process hasdbt
127 if ($dbf{'hasdbt'} == 131) {
128 $dbf{'hasdbt'} = 1;
129 } elsif ($dbf{'hasdbt'} == 3 || $dbf{'hasdbt'} == 48) {
130 $dbf{'hasdbt'} = 0;
131 } else {
132 if ($gli) {
133 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
134 }
135 print STDERR "FOXPlug:read - $fullname doesn't seem to be a Foxbase file\n";
136 return -1;
137 }
138
139 # read in the field description
140 $dbf{'numfields'} = 0;
141 $dbf{'fieldinfo'} = [];
142 while (read (FOXBASEIN, $temp, 1) > 0) {
143 last if ($temp eq "\x0d");
144 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
145
146 my %field = ();
147 $field{'name'} = $self->extracttext($temp, 11);
148 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
149 = unpack ("x11a1VCC", $temp);
150
151 push (@{$dbf{'fieldinfo'}}, \%field);
152
153 $dbf{'numfields'} ++;
154 }
155
156 # open the dbt file if we need to
157 my $dbtfullname = $fullname;
158 if ($fullname =~ /f$/) {
159 $dbtfullname =~ s/f$/t/;
160 } else {
161 $dbtfullname =~ s/F$/T/;
162 }
163 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
164 if ($gli) {
165 print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
166 }
167 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
168 close (FOXBASEIN);
169 return -1;
170 }
171
172 # read in and process each record in the database
173 my $numrecords = 0;
174 while (($numrecords < $dbf{'numrecords'}) &&
175 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
176
177 # create a new record
178 my $record = [];
179
180 foreach my $field (@{$dbf{'fieldinfo'}}) {
181 my $fieldvalue = "";
182
183 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
184 # a memo field, look up this field in the dbt file
185 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
186
187 $seekpos =~ s/^\s*//;
188 $seekpos = 0 unless $seekpos =~ /^\d+$/;
189
190 $seekpos = $seekpos * 512;
191
192 if ($seekpos == 0) {
193 # there is no memo field
194
195 } elsif (seek (DBTIN, $seekpos, 0)) {
196 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
197 last if ($fieldvalue =~ /\cZ/);
198 }
199
200 # remove everything after the control-Z
201 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
202
203 } else {
204 print STDERR "\nERROR - seek (to $seekpos) failed\n";
205 }
206
207 } else {
208 # a normal field
209 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
210 }
211
212 push (@$record, {%$field, 'value'=>$fieldvalue});
213 }
214
215 # process this record
216 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
217 $numrecords, $record);
218
219 # finished another record...
220 $numrecords++;
221 }
222
223 # close the dbt file if we need to
224 if ($dbf{'hasdbt'}) {
225 close (DBTIN);
226 }
227
228 # close the dbf file
229 close (FOXBASEIN);
230
231 # finished processing
232 return 1;
233}
234
235
236# will extract a string from some larger string, making it
237# conform to a number of constraints
238sub extracttext {
239 my $self = shift (@_);
240 my ($text, $maxlen, $offset, $stopstr) = @_;
241 $offset = 0 unless defined $offset;
242 $stopstr = "\x00" unless defined $stopstr;
243
244 # decide where the string finishes
245 my $end = index ($text, $stopstr, $offset);
246 $end = length ($text) if $end < 0;
247 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
248
249 return "" if ($end <= $offset);
250 return substr ($text, $offset, $end-$offset);
251}
252
253
254# process_record should be overriden for a particular type
255# of database. This default version outputs an html document
256# containing all the fields in the record as a table.
257# It also assumes that the text is in utf-8.
258sub process_record {
259 my $self = shift (@_);
260 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
261
262 # create a new document
263 my $doc_obj = new doc ($file, "indexed_doc");
264 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
265 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
266 my $section = $doc_obj->get_top_section();
267
268 $doc_obj->add_metadata($section, "FileFormat", "FOX");
269 $doc_obj->add_metadata($section, "FileSize", (-s $file));
270
271 # start of document
272 $doc_obj->add_utf8_text($section, "<table>\n");
273
274 # add each field
275 foreach my $field (@$record) {
276 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
277 $doc_obj->add_utf8_text($section, " <tr>\n");
278 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
279 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
280 $doc_obj->add_utf8_text($section, " </tr>\n");
281 }
282 }
283
284 # end of document
285 $doc_obj->add_utf8_text($section, "</table>\n");
286
287 # add an object id
288 $doc_obj->set_OID();
289
290 # process the document
291 $processor->process($doc_obj);
292}
293
294
2951;
Note: See TracBrowser for help on using the repository browser.