source: tags/gsdl-2_51-distribution/gsdl/perllib/plugins/FOXPlug.pm@ 7622

Last change on this file since 7622 was 7508, checked in by kjdon, 20 years ago

changed the plugin metadata - instead of having eg HTMLPlug metadata set to 1, now we have Plugin metadata set to HTMLPlug

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.6 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package FOXPlug;
34
35use BasPlug;
36use util;
37use doc;
38use unicode;
39use cnseg;
40# use gb;
41
42
43sub BEGIN {
44 @ISA = ('BasPlug');
45}
46
47my $options = { 'name' => "FOXPlug",
48 'desc' => "{FOXPlug.desc}",
49 'abstract' => "no",
50 'inherits' => "yes" };
51
52sub new {
53 my ($class) = @_;
54 $self = new BasPlug ();
55 $self->{'plugin_type'} = "FOXPlug";
56 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
57 my $option_list = $self->{'option_list'};
58 push( @{$option_list}, $options );
59
60 return bless $self, $class;
61}
62
63sub is_recursive {
64 my $self = shift (@_);
65
66 return 0; # this is not a recursive plugin
67}
68
69
70# return number of files processed, undef if can't process
71# Note that $base_dir might be "" and that $file might
72# include directories
73sub read {
74 my $self = shift (@_);
75 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
76 my $fullname = &util::filename_cat ($base_dir, $file);
77
78 # dbt files are processed at the same time as dbf files
79 return 0 if ($fullname =~ /\.dbt$/i);
80
81 # see if this is a foxbase database
82 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);
83
84 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
85
86 # open the file
87 if (!open (FOXBASEIN, $fullname)) {
88 print STDERR "FOXPlug::read - couldn't read $fullname\n";
89 return -1; # error in processing
90 }
91
92 print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
93
94 print STDERR "FOXPlug: processing $file\n";
95
96 # read in the database header
97 my ($temp, %dbf);
98
99 # read in information about dbt file
100 if (read (FOXBASEIN, $temp, 32) < 32) {
101 print STDERR "FOXPlug::read - eof while reading database header";
102 close (FOXBASEIN);
103 return -1;
104 }
105
106 # unpack the header
107 ($dbf{'hasdbt'},
108 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
109 $dbf{'numrecords'}, $dbf{'headerlen'},
110 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
111
112 # process hasdbt
113 if ($dbf{'hasdbt'} == 131) {
114 $dbf{'hasdbt'} = 1;
115 } elsif ($dbf{'hasdbt'} == 3 || $dbf{'hasdbt'} == 48) {
116 $dbf{'hasdbt'} = 0;
117 } else {
118 print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n";
119 return -1;
120 }
121
122 # read in the field description
123 $dbf{'numfields'} = 0;
124 $dbf{'fieldinfo'} = [];
125 while (read (FOXBASEIN, $temp, 1) > 0) {
126 last if ($temp eq "\x0d");
127 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
128
129 my %field = ();
130 $field{'name'} = $self->extracttext($temp, 11);
131 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
132 = unpack ("x11a1VCC", $temp);
133
134 push (@{$dbf{'fieldinfo'}}, \%field);
135
136 $dbf{'numfields'} ++;
137 }
138
139 # open the dbt file if we need to
140 $dbtfullname = $fullname;
141 if ($fullname =~ /f$/) {
142 $dbtfullname =~ s/f$/t/;
143 } else {
144 $dbtfullname =~ s/F$/T/;
145 }
146 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
147 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
148 close (FOXBASEIN);
149 return -1;
150 }
151
152 # read in and process each record in the database
153 my $numrecords = 0;
154 while (($numrecords < $dbf{'numrecords'}) &&
155 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
156
157 # create a new record
158 my $record = [];
159
160 foreach $field (@{$dbf{'fieldinfo'}}) {
161 my $fieldvalue = "";
162
163 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
164 # a memo field, look up this field in the dbt file
165 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
166
167 $seekpos =~ s/^\s*//;
168 $seekpos = 0 unless $seekpos =~ /^\d+$/;
169
170 $seekpos = $seekpos * 512;
171
172 if ($seekpos == 0) {
173 # there is no memo field
174
175 } elsif (seek (DBTIN, $seekpos, 0)) {
176 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
177 last if ($fieldvalue =~ /\cZ/);
178 }
179
180 # remove everything after the control-Z
181 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
182
183 } else {
184 print STDERR "\nERROR - seek (to $seekpos) failed\n";
185 }
186
187 } else {
188 # a normal field
189 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
190 }
191
192 push (@$record, {%$field, 'value'=>$fieldvalue});
193 }
194
195 # process this record
196 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
197 $numrecords, $record);
198
199 # finished another record...
200 $numrecords++;
201 }
202
203 # close the dbt file if we need to
204 if ($dbf{'hasdbt'}) {
205 close (DBTIN);
206 }
207
208 # close the dbf file
209 close (FOXBASEIN);
210
211 # finished processing
212 return 1;
213}
214
215
216# will extract a string from some larger string, making it
217# conform to a number of constraints
218sub extracttext {
219 my $self = shift (@_);
220 my ($text, $maxlen, $offset, $stopstr) = @_;
221 $offset = 0 unless defined $offset;
222 $stopstr = "\x00" unless defined $stopstr;
223
224 # decide where the string finishes
225 my $end = index ($text, $stopstr, $offset);
226 $end = length ($text) if $end < 0;
227 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
228
229 return "" if ($end <= $offset);
230 return substr ($text, $offset, $end-$offset);
231}
232
233
234# process_record should be overriden for a particular type
235# of database. This default version outputs an html document
236# containing all the fields in the record as a table.
237# It also assumes that the text is in utf-8.
238sub process_record {
239 my $self = shift (@_);
240 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
241
242 # create a new document
243 my $doc_obj = new doc ($file, "indexed_doc");
244 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
245 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
246
247 my $section = $doc_obj->get_top_section();
248
249 # start of document
250 $doc_obj->add_utf8_text($section, "<table>\n");
251
252 # add each field
253 foreach $field (@$record) {
254 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
255 $doc_obj->add_utf8_text($section, " <tr>\n");
256 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
257 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
258 $doc_obj->add_utf8_text($section, " </tr>\n");
259 }
260 }
261
262 # end of document
263 $doc_obj->add_utf8_text($section, "</table>\n");
264
265 # add an object id
266 $doc_obj->set_OID();
267
268 # process the document
269 $processor->process($doc_obj);
270}
271
272
2731;
Note: See TracBrowser for help on using the repository browser.