source: main/tags/2.41/gsdl/perllib/plugins/FOXPlug.pm@ 29031

Last change on this file since 29031 was 5924, checked in by kjdon, 21 years ago

changed the new metadata to eg WordPlug instead of Word, cos a clash with Image

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package FOXPlug;
34
35use BasPlug;
36use util;
37use doc;
38use unicode;
39use cnseg;
40# use gb;
41
42
43sub BEGIN {
44 @ISA = ('BasPlug');
45}
46
47my $options = { 'name' => "FOXPlug",
48 'desc' => "{FOXPlug.desc}",
49 'inherits' => "yes" };
50
51sub new {
52 my ($class) = @_;
53 $self = new BasPlug ();
54 $self->{'plugin_type'} = "FOXPlug";
55 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
56 my $option_list = $self->{'option_list'};
57 push( @{$option_list}, $options );
58
59 return bless $self, $class;
60}
61
62sub is_recursive {
63 my $self = shift (@_);
64
65 return 0; # this is not a recursive plugin
66}
67
68
69# return number of files processed, undef if can't process
70# Note that $base_dir might be "" and that $file might
71# include directories
72sub read {
73 my $self = shift (@_);
74 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
75 my $fullname = &util::filename_cat ($base_dir, $file);
76
77 # dbt files are processed at the same time as dbf files
78 return 0 if ($fullname =~ /\.dbt$/i);
79
80 # see if this is a foxbase database
81 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
82
83 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
84
85 # open the file
86 if (!open (FOXBASEIN, $fullname)) {
87 print STDERR "FOXPlug::read - couldn't read $fullname\n";
88 return undef;
89 }
90
91 print STDERR "FOXPlug: processing $file\n";
92
93 # read in the database header
94 my ($temp, %dbf);
95
96 # read in information about dbt file
97 if (read (FOXBASEIN, $temp, 32) < 32) {
98 print STDERR "FOXPlug::read - eof while reading database header";
99 close (FOXBASEIN);
100 return undef;
101 }
102
103 # unpack the header
104 ($dbf{'hasdbt'},
105 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
106 $dbf{'numrecords'}, $dbf{'headerlen'},
107 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
108
109 # process hasdbt
110 if ($dbf{'hasdbt'} == 131) {
111 $dbf{'hasdbt'} = 1;
112 } elsif ($dbf{'hasdbt'} == 3) {
113 $dbf{'hasdbt'} = 0;
114 } else {
115 print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n";
116 return undef;
117 }
118
119 # read in the field description
120 $dbf{'numfields'} = 0;
121 $dbf{'fieldinfo'} = [];
122 while (read (FOXBASEIN, $temp, 1) > 0) {
123 last if ($temp eq "\x0d");
124 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
125
126 my %field = ();
127 $field{'name'} = $self->extracttext($temp, 11);
128 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
129 = unpack ("x11a1VCC", $temp);
130
131 push (@{$dbf{'fieldinfo'}}, \%field);
132
133 $dbf{'numfields'} ++;
134 }
135
136 # open the dbt file if we need to
137 $dbtfullname = $fullname;
138 if ($fullname =~ /f$/) {
139 $dbtfullname =~ s/f$/t/;
140 } else {
141 $dbtfullname =~ s/F$/T/;
142 }
143 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
144 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
145 close (FOXBASEIN);
146 return undef;
147 }
148
149 # read in and process each record in the database
150 my $numrecords = 0;
151 while (($numrecords < $dbf{'numrecords'}) &&
152 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
153
154 # create a new record
155 my $record = [];
156
157 foreach $field (@{$dbf{'fieldinfo'}}) {
158 my $fieldvalue = "";
159
160 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
161 # a memo field, look up this field in the dbt file
162 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
163
164 $seekpos =~ s/^\s*//;
165 $seekpos = 0 unless $seekpos =~ /^\d+$/;
166
167 $seekpos = $seekpos * 512;
168
169 if ($seekpos == 0) {
170 # there is no memo field
171
172 } elsif (seek (DBTIN, $seekpos, 0)) {
173 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
174 last if ($fieldvalue =~ /\cZ/);
175 }
176
177 # remove everything after the control-Z
178 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
179
180 } else {
181 print STDERR "\nERROR - seek (to $seekpos) failed\n";
182 }
183
184 } else {
185 # a normal field
186 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
187 }
188
189 push (@$record, {%$field, 'value'=>$fieldvalue});
190 }
191
192 # process this record
193 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
194 $numrecords, $record);
195
196 # finished another record...
197 $numrecords++;
198 }
199
200 # close the dbt file if we need to
201 if ($dbf{'hasdbt'}) {
202 close (DBTIN);
203 }
204
205 # close the dbf file
206 close (FOXBASEIN);
207
208 # finished processing
209 return 1;
210}
211
212
213# will extract a string from some larger string, making it
214# conform to a number of constraints
215sub extracttext {
216 my $self = shift (@_);
217 my ($text, $maxlen, $offset, $stopstr) = @_;
218 $offset = 0 unless defined $offset;
219 $stopstr = "\x00" unless defined $stopstr;
220
221 # decide where the string finishes
222 my $end = index ($text, $stopstr, $offset);
223 $end = length ($text) if $end < 0;
224 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
225
226 return "" if ($end <= $offset);
227 return substr ($text, $offset, $end-$offset);
228}
229
230
231# process_record should be overriden for a particular type
232# of database. This default version outputs an html document
233# containing all the fields in the record as a table.
234# It also assumes that the text is in utf-8.
235sub process_record {
236 my $self = shift (@_);
237 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
238
239 # create a new document
240 my $doc_obj = new doc ($file, "indexed_doc");
241 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
242 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "$self->{'plugin_type'}", "1");
243
244 my $section = $doc_obj->get_top_section();
245
246 # start of document
247 $doc_obj->add_utf8_text($section, "<table>\n");
248
249 # add each field
250 foreach $field (@$record) {
251 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
252 $doc_obj->add_utf8_text($section, " <tr>\n");
253 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
254 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
255 $doc_obj->add_utf8_text($section, " </tr>\n");
256 }
257 }
258
259 # end of document
260 $doc_obj->add_utf8_text($section, "</table>\n");
261
262 # add an object id
263 $doc_obj->set_OID();
264
265 # process the document
266 $processor->process($doc_obj);
267}
268
269
2701;
Note: See TracBrowser for help on using the repository browser.