source: gsdl/trunk/perllib/plugins/FOXPlug.pm@ 14661

Last change on this file since 14661 was 12270, checked in by kjdon, 18 years ago

set_OIDtype now takes two arguments, the type and the metadata (used if type=assigned)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1###########################################################################
2#
3# FOXPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package FOXPlug;
34
35use BasPlug;
36use util;
37use doc;
38use unicode;
39use cnseg;
40# use gb;
41
42use strict;
43no strict 'refs'; # allow filehandles to be variables and viceversa
44
45
46sub BEGIN {
47 @FOXPlug::ISA = ('BasPlug');
48}
49
50my $arguments =
51 [ { 'name' => "process_exp",
52 'desc' => "{BasPlug.process_exp}",
53 'type' => "regexp",
54 'reqd' => "no",
55 'deft' => &get_default_process_exp() },
56 { 'name' => "block_exp",
57 'desc' => "{BasPlug.block_exp}",
58 'type' => "regexp",
59 'reqd' => "no",
60 'deft' => &get_default_block_exp() } ];
61
62my $options = { 'name' => "FOXPlug",
63 'desc' => "{FOXPlug.desc}",
64 'abstract' => "no",
65 'inherits' => "yes",
66 'args' => $arguments };
67
68sub new {
69 my ($class) = shift (@_);
70 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
71 push(@$pluginlist, $class);
72
73 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
74 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
75
76 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
77
78 return bless $self, $class;
79}
80
81sub get_default_process_exp {
82 my $self = shift (@_);
83
84 return q^(?i)\.dbf$^;
85}
86
87#dbt files are processed at the same time as dbf files
88sub get_default_block_exp {
89 my $self = shift (@_);
90
91 return q^(?i)\.dbt$^;
92}
93
94# return number of files processed, undef if can't process
95# Note that $base_dir might be "" and that $file might
96# include directories
97sub read {
98 my $self = shift (@_);
99 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
100
101 #check for associate_ext, blocking etc, are we processing this file?
102 my ($block_status,$fullname) = $self->read_block(@_);
103 return $block_status if ((!defined $block_status) || ($block_status==0));
104
105 print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
106 print STDERR "FOXPlug: processing $file\n" if $self->{'verbosity'} > 1;
107
108 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
109
110 # open the file
111 if (!open (FOXBASEIN, $fullname)) {
112 if ($gli) {
113 print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n";
114 }
115 print STDERR "FOXPlug::read - couldn't read $fullname\n";
116 return -1; # error in processing
117 }
118
119 # read in the database header
120 my ($temp, %dbf);
121
122 # read in information about dbt file
123 if (read (FOXBASEIN, $temp, 32) < 32) {
124 if ($gli) {
125 print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
126 }
127 print STDERR "FOXPlug::read - eof while reading database header\n";
128 close (FOXBASEIN);
129 return -1;
130 }
131
132 # unpack the header
133 ($dbf{'hasdbt'},
134 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
135 $dbf{'numrecords'}, $dbf{'headerlen'},
136 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
137
138 # process hasdbt
139 if ($dbf{'hasdbt'} == 131) {
140 $dbf{'hasdbt'} = 1;
141 } elsif ($dbf{'hasdbt'} == 3 || $dbf{'hasdbt'} == 48) {
142 $dbf{'hasdbt'} = 0;
143 } else {
144 if ($gli) {
145 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
146 }
147 print STDERR "FOXPlug:read - $fullname doesn't seem to be a Foxbase file\n";
148 return -1;
149 }
150
151 # read in the field description
152 $dbf{'numfields'} = 0;
153 $dbf{'fieldinfo'} = [];
154 while (read (FOXBASEIN, $temp, 1) > 0) {
155 last if ($temp eq "\x0d");
156 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
157
158 my %field = ();
159 $field{'name'} = $self->extracttext($temp, 11);
160 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
161 = unpack ("x11a1VCC", $temp);
162
163 push (@{$dbf{'fieldinfo'}}, \%field);
164
165 $dbf{'numfields'} ++;
166 }
167
168 # open the dbt file if we need to
169 my $dbtfullname = $fullname;
170 if ($fullname =~ /f$/) {
171 $dbtfullname =~ s/f$/t/;
172 } else {
173 $dbtfullname =~ s/F$/T/;
174 }
175 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
176 if ($gli) {
177 print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
178 }
179 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
180 close (FOXBASEIN);
181 return -1;
182 }
183
184 # read in and process each record in the database
185 my $numrecords = 0;
186 while (($numrecords < $dbf{'numrecords'}) &&
187 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
188
189 # create a new record
190 my $record = [];
191
192 foreach my $field (@{$dbf{'fieldinfo'}}) {
193 my $fieldvalue = "";
194
195 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
196 # a memo field, look up this field in the dbt file
197 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
198
199 $seekpos =~ s/^\s*//;
200 $seekpos = 0 unless $seekpos =~ /^\d+$/;
201
202 $seekpos = $seekpos * 512;
203
204 if ($seekpos == 0) {
205 # there is no memo field
206
207 } elsif (seek (DBTIN, $seekpos, 0)) {
208 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
209 last if ($fieldvalue =~ /\cZ/);
210 }
211
212 # remove everything after the control-Z
213 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
214
215 } else {
216 print STDERR "\nERROR - seek (to $seekpos) failed\n";
217 }
218
219 } else {
220 # a normal field
221 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
222 }
223
224 push (@$record, {%$field, 'value'=>$fieldvalue});
225 }
226
227 # process this record
228 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
229 $numrecords, $record);
230
231 # finished another record...
232 $numrecords++;
233 }
234
235 # close the dbt file if we need to
236 if ($dbf{'hasdbt'}) {
237 close (DBTIN);
238 }
239
240 # close the dbf file
241 close (FOXBASEIN);
242
243 # finished processing
244 return 1;
245}
246
247
248# will extract a string from some larger string, making it
249# conform to a number of constraints
250sub extracttext {
251 my $self = shift (@_);
252 my ($text, $maxlen, $offset, $stopstr) = @_;
253 $offset = 0 unless defined $offset;
254 $stopstr = "\x00" unless defined $stopstr;
255
256 # decide where the string finishes
257 my $end = index ($text, $stopstr, $offset);
258 $end = length ($text) if $end < 0;
259 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
260
261 return "" if ($end <= $offset);
262 return substr ($text, $offset, $end-$offset);
263}
264
265
266# process_record should be overriden for a particular type
267# of database. This default version outputs an html document
268# containing all the fields in the record as a table.
269# It also assumes that the text is in utf-8.
270sub process_record {
271 my $self = shift (@_);
272 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
273
274 # create a new document
275 my $doc_obj = new doc ($file, "indexed_doc");
276 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
277 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
278 my $section = $doc_obj->get_top_section();
279
280 $doc_obj->add_metadata($section, "FileFormat", "FOX");
281 $doc_obj->add_metadata($section, "FileSize", (-s $file));
282
283 # start of document
284 $doc_obj->add_utf8_text($section, "<table>\n");
285
286 # add each field
287 foreach my $field (@$record) {
288 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
289 $doc_obj->add_utf8_text($section, " <tr>\n");
290 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
291 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
292 $doc_obj->add_utf8_text($section, " </tr>\n");
293 }
294 }
295
296 # end of document
297 $doc_obj->add_utf8_text($section, "</table>\n");
298
299 # add an object id
300 $doc_obj->set_OID();
301
302 # process the document
303 $processor->process($doc_obj);
304}
305
306
3071;
Note: See TracBrowser for help on using the repository browser.