########################################################################### # # FOXPlug.pm # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # plugin to process a Foxbase dbt file. This plugin provides the basic # functionality to read in the dbt and dbf files and process each record. # This general plugin should be overridden for a particular database to process # the appropriate fields in the file. package FOXPlug; use BasPlug; use util; use doc; use unicode; use cnseg; use gb; sub BEGIN { @ISA = ('BasPlug'); } sub new { my ($class) = @_; $self = new BasPlug (); return bless $self, $class; } sub is_recursive { my $self = shift (@_); return 0; # this is not a recursive plugin } # return number of files processed, undef if can't process # Note that $base_dir might be "" and that $file might # include directories sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; my $fullname = &util::filename_cat ($base_dir, $file); # dbt files are processed at the same time as dbf files return 0 if ($fullname =~ /\.dbt$/i); # see if this is a foxbase database return undef unless (-f $fullname && $fullname =~ /\.dbf$/i); my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i; # open the file if (!open (FOXBASEIN, $fullname)) { print STDERR "FOXPlug::read - couldn't read $fullname\n"; return undef; } print STDERR "FOXPlug: processing $file\n"; # read in the database header my ($temp, %dbf); # read in information about dbt file if (read (FOXBASEIN, $temp, 32) < 32) { print STDERR "FOXPlug::read - eof while reading database header"; close (FOXBASEIN); return undef; } # unpack the header ($dbf{'hasdbt'}, $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'}, $dbf{'numrecords'}, $dbf{'headerlen'}, $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp); # process hasdbt if ($dbf{'hasdbt'} == 131) { $dbf{'hasdbt'} = 1; } elsif ($dbf{'hasdbt'} == 3) { $dbf{'hasdbt'} = 0; } else { print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n"; return undef; } # read in the field description $dbf{'numfields'} = 0; $dbf{'fieldinfo'} = []; while (read (FOXBASEIN, $temp, 1) > 0) { last if ($temp eq "\x0d"); last if (read (FOXBASEIN, $temp, 31, 1) < 31); my %field = (); $field{'name'} = $self->extracttext($temp, 11); ($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'}) = unpack ("x11a1VCC", $temp); push (@{$dbf{'fieldinfo'}}, \%field); $dbf{'numfields'} ++; } # open the dbt file if we need to $dbtfullname = $fullname; if ($fullname =~ /f$/) { $dbtfullname =~ s/f$/t/; } else { $dbtfullname =~ s/F$/T/; } if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) { print STDERR "FOXPlug::read - couldn't read $dbtfullname\n"; close (FOXBASEIN); return undef; } # read in and process each record in the database my $numrecords = 0; while (($numrecords < $dbf{'numrecords'}) && (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) { # create a new record my $record = []; foreach $field (@{$dbf{'fieldinfo'}}) { my $fieldvalue = ""; if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) { # a memo field, look up this field in the dbt file my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'}); $seekpos =~ s/^\s*//; $seekpos = 0 unless $seekpos =~ /^\d+$/; $seekpos = $seekpos * 512; if ($seekpos == 0) { # there is no memo field } elsif (seek (DBTIN, $seekpos, 0)) { while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) { last if ($fieldvalue =~ /\cZ/); } # remove everything after the control-Z substr($fieldvalue, index($fieldvalue, "\cZ")) = ""; } else { print STDERR "\nERROR - seek (to $seekpos) failed\n"; } } else { # a normal field $fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'}); } push (@$record, {%$field, 'value'=>$fieldvalue}); } # process this record $self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record); # finished another record... $numrecords++; } # close the dbt file if we need to if ($dbf{'hasdbt'}) { close (DBTIN); } # close the dbf file close (FOXBASEIN); # finished processing return 1; } # will extract a string from some larger string, making it # conform to a number of constraints sub extracttext { my $self = shift (@_); my ($text, $maxlen, $offset, $stopstr) = @_; $offset = 0 unless defined $offset; $stopstr = "\x00" unless defined $stopstr; # decide where the string finishes my $end = index ($text, $stopstr, $offset); $end = length ($text) if $end < 0; $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen); return "" if ($end <= $offset); return substr ($text, $offset, $end-$offset); } # process_record should be overriden for a particular type # of database. This default version outputs an html document # containing all the fields in the record as a table. # It also assumes that the text is in utf-8. sub process_record { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_; # create a new document my $doc_obj = new doc ($file, "indexed_doc"); my $section = $doc_obj->get_top_section(); # start of document $doc_obj->add_utf8_text($section, "\n"); # add each field foreach $field (@$record) { if (defined ($field->{'name'}) && defined ($field->{'value'})) { $doc_obj->add_utf8_text($section, " \n"); $doc_obj->add_utf8_text($section, " \n"); $doc_obj->add_utf8_text($section, " \n"); $doc_obj->add_utf8_text($section, " \n"); } } # end of document $doc_obj->add_utf8_text($section, "
$field->{'name'}$field->{'value'}
\n"); # add an object id $doc_obj->set_OID(); # process the document $processor->process($doc_obj); } 1;