source: gsdl/trunk/perllib/plugins/FOXPlugin.pm@ 17026

Last change on this file since 17026 was 17026, checked in by kjdon, 16 years ago

OID generation modifications: OIDtype and OIDmetadata options now available for plugins as well as import. OIDtype for plugins defaults to auto - if set to auto, then use the values from import. All plugins now call self->add_OID instead of doc_obj->set_OID. This sets the doc_obj OIDtype so that doesn't need to be donein other places any more. all plugins have the get_oid_hash_type method - normally returns hash_on_file, but can be overridden to return hash_on_ga_xml for those plugins that don't want hashing on file (MP3,OggVorbis...)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1###########################################################################
2#
3# FOXPlugin.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin to process a Foxbase dbt file. This plugin provides the basic
27# functionality to read in the dbt and dbf files and process each record.
28# This general plugin should be overridden for a particular database to process
29# the appropriate fields in the file.
30
31package FOXPlugin;
32
33use BasePlugin;
34use util;
35use doc;
36use unicode;
37
38use strict;
39no strict 'refs'; # allow filehandles to be variables and viceversa
40
41
42sub BEGIN {
43 @FOXPlugin::ISA = ('BasePlugin');
44}
45
46my $arguments =
47 [ { 'name' => "process_exp",
48 'desc' => "{BasePlugin.process_exp}",
49 'type' => "regexp",
50 'reqd' => "no",
51 'deft' => &get_default_process_exp() },
52 { 'name' => "block_exp",
53 'desc' => "{BasePlugin.block_exp}",
54 'type' => "regexp",
55 'reqd' => "no",
56 'deft' => &get_default_block_exp() } ];
57
58my $options = { 'name' => "FOXPlugin",
59 'desc' => "{FOXPlugin.desc}",
60 'abstract' => "no",
61 'inherits' => "yes",
62 'args' => $arguments };
63
64sub new {
65 my ($class) = shift (@_);
66 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
67 push(@$pluginlist, $class);
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
73
74 return bless $self, $class;
75}
76
77sub get_default_process_exp {
78 my $self = shift (@_);
79
80 return q^(?i)\.dbf$^;
81}
82
83#dbt files are processed at the same time as dbf files
84sub get_default_block_exp {
85 my $self = shift (@_);
86
87 return q^(?i)\.dbt$^;
88}
89
90# return number of files processed, undef if can't process
91# Note that $base_dir might be "" and that $file might
92# include directories
93sub read {
94 my $self = shift (@_);
95 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
96
97 # can we process this file??
98 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
99 return undef unless $self->can_process_this_file($filename_full_path);
100
101 print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli);
102 print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1;
103
104 my ($parent_dir) = $filename_full_path =~ /^(.*)\/[^\/]+\.dbf$/i;
105
106 # open the file
107 if (!open (FOXBASEIN, $filename_full_path)) {
108 if ($gli) {
109 print STDERR "<ProcessingError n='$file' r='Could not read $filename_full_path'>\n";
110 }
111 print STDERR "FOXPlugin::read - couldn't read $filename_full_path\n";
112 return -1; # error in processing
113 }
114
115 # read in the database header
116 my ($temp, %dbf);
117
118 # read in information about dbt file
119 if (read (FOXBASEIN, $temp, 32) < 32) {
120 if ($gli) {
121 print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
122 }
123 print STDERR "FOXPlugin::read - eof while reading database header\n";
124 close (FOXBASEIN);
125 return -1;
126 }
127
128 # unpack the header
129 ($dbf{'hasdbt'},
130 $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
131 $dbf{'numrecords'}, $dbf{'headerlen'},
132 $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
133
134 # process hasdbt
135 if ($dbf{'hasdbt'} == 131) {
136 $dbf{'hasdbt'} = 1;
137 } elsif ($dbf{'hasdbt'} == 3 || $dbf{'hasdbt'} == 48) {
138 $dbf{'hasdbt'} = 0;
139 } else {
140 if ($gli) {
141 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
142 }
143 print STDERR "FOXPlugin:read - $filename_full_path doesn't seem to be a Foxbase file\n";
144 return -1;
145 }
146
147 # read in the field description
148 $dbf{'numfields'} = 0;
149 $dbf{'fieldinfo'} = [];
150 while (read (FOXBASEIN, $temp, 1) > 0) {
151 last if ($temp eq "\x0d");
152 last if (read (FOXBASEIN, $temp, 31, 1) < 31);
153
154 my %field = ();
155 $field{'name'} = $self->extracttext($temp, 11);
156 ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'})
157 = unpack ("x11a1VCC", $temp);
158
159 push (@{$dbf{'fieldinfo'}}, \%field);
160
161 $dbf{'numfields'} ++;
162 }
163
164 # open the dbt file if we need to
165 my $dbtfullname = $filename_full_path;
166 if ($filename_full_path =~ /f$/) {
167 $dbtfullname =~ s/f$/t/;
168 } else {
169 $dbtfullname =~ s/F$/T/;
170 }
171 if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
172 if ($gli) {
173 print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
174 }
175 print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n";
176 close (FOXBASEIN);
177 return -1;
178 }
179
180 # read in and process each record in the database
181 my $numrecords = 0;
182 while (($numrecords < $dbf{'numrecords'}) &&
183 (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {
184
185 # create a new record
186 my $record = [];
187
188 foreach my $field (@{$dbf{'fieldinfo'}}) {
189 my $fieldvalue = "";
190
191 if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
192 # a memo field, look up this field in the dbt file
193 my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});
194
195 $seekpos =~ s/^\s*//;
196 $seekpos = 0 unless $seekpos =~ /^\d+$/;
197
198 $seekpos = $seekpos * 512;
199
200 if ($seekpos == 0) {
201 # there is no memo field
202
203 } elsif (seek (DBTIN, $seekpos, 0)) {
204 while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
205 last if ($fieldvalue =~ /\cZ/);
206 }
207
208 # remove everything after the control-Z
209 substr($fieldvalue, index($fieldvalue, "\cZ")) = "";
210
211 } else {
212 print STDERR "\nERROR - seek (to $seekpos) failed\n";
213 }
214
215 } else {
216 # a normal field
217 $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
218 }
219
220 push (@$record, {%$field, 'value'=>$fieldvalue});
221 }
222
223 # process this record
224 $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor,
225 $numrecords, $record);
226
227 # finished another record...
228 $numrecords++;
229 }
230
231 # close the dbt file if we need to
232 if ($dbf{'hasdbt'}) {
233 close (DBTIN);
234 }
235
236 # close the dbf file
237 close (FOXBASEIN);
238
239 # finished processing
240 return 1;
241}
242
243
244# will extract a string from some larger string, making it
245# conform to a number of constraints
246sub extracttext {
247 my $self = shift (@_);
248 my ($text, $maxlen, $offset, $stopstr) = @_;
249 $offset = 0 unless defined $offset;
250 $stopstr = "\x00" unless defined $stopstr;
251
252 # decide where the string finishes
253 my $end = index ($text, $stopstr, $offset);
254 $end = length ($text) if $end < 0;
255 $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
256
257 return "" if ($end <= $offset);
258 return substr ($text, $offset, $end-$offset);
259}
260
261
262# process_record should be overriden for a particular type
263# of database. This default version outputs an html document
264# containing all the fields in the record as a table.
265# It also assumes that the text is in utf-8.
266sub process_record {
267 my $self = shift (@_);
268 my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;
269
270 # create a new document
271 my $doc_obj = new doc ($file, "indexed_doc");
272
273 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
274 my $section = $doc_obj->get_top_section();
275
276 $doc_obj->add_metadata($section, "FileFormat", "FOX");
277 $doc_obj->add_metadata($section, "FileSize", (-s $file));
278
279 # start of document
280 $doc_obj->add_utf8_text($section, "<table>\n");
281
282 # add each field
283 foreach my $field (@$record) {
284 if (defined ($field->{'name'}) && defined ($field->{'value'})) {
285 $doc_obj->add_utf8_text($section, " <tr>\n");
286 $doc_obj->add_utf8_text($section, " <td>$field->{'name'}</td>\n");
287 $doc_obj->add_utf8_text($section, " <td>$field->{'value'}</td>\n");
288 $doc_obj->add_utf8_text($section, " </tr>\n");
289 }
290 }
291
292 # end of document
293 $doc_obj->add_utf8_text($section, "</table>\n");
294
295 # add an object id
296 $self->add_OID($doc_obj);
297
298 # process the document
299 $processor->process($doc_obj);
300}
301
302
3031;
Note: See TracBrowser for help on using the repository browser.