###########################################################################
#
# FOXPlug.pm
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# plugin to process a Foxbase dbt file. This plugin provides the basic
# functionality to read in the dbt and dbf files and process each record.
# This general plugin should be overridden for a particular database to process
# the appropriate fields in the file.

package FOXPlug;

use BasPlug;
use util;
use doc;
use unicode;
use cnseg;
use gb;


sub BEGIN {
    @ISA = ('BasPlug');
}

sub new {
    my ($class) = @_;
    $self = new BasPlug ();

    return bless $self, $class;
}

sub is_recursive {
    my $self = shift (@_);

    return 0; # this is not a recursive plugin
}


# return number of files processed, undef if can't process
# Note that $base_dir might be "" and that $file might 
# include directories
sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    my $fullname = &util::filename_cat ($base_dir, $file);

    # dbt files are processed at the same time as dbf files
    return 0 if ($fullname =~ /\.dbt$/i);

    # see if this is a foxbase database
    return undef unless (-f $fullname && $fullname =~ /\.dbf$/i);

    my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;

    # open the file
    if (!open (FOXBASEIN, $fullname)) {
	print STDERR "FOXPlug::read - couldn't read $fullname\n";
	return undef;
    }
    
    print STDERR "FOXPlug: processing $file\n";

    # read in the database header
    my ($temp, %dbf);
    
    # read in information about dbt file
    if (read (FOXBASEIN, $temp, 32) < 32) {
	print STDERR "FOXPlug::read - eof while reading database header";
	close (FOXBASEIN);
	return undef;
    }
    
    # unpack the header
    ($dbf{'hasdbt'},
     $dbf{'modyear'}, $dbf{'modmonth'}, $dbf{'modday'},
     $dbf{'numrecords'}, $dbf{'headerlen'}, 
     $dbf{'recordlen'}) = unpack ("CCCCVvv", $temp);
    
    # process hasdbt
    if ($dbf{'hasdbt'} == 131) {
	$dbf{'hasdbt'} = 1;
    } elsif ($dbf{'hasdbt'} == 3) {
	$dbf{'hasdbt'} = 0;
    } else {
	print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n";
	return undef;
    }

    # read in the field description
    $dbf{'numfields'} = 0;
    $dbf{'fieldinfo'} = [];
    while (read (FOXBASEIN, $temp, 1) > 0) {
	last if ($temp eq "\x0d");
	last if (read (FOXBASEIN, $temp, 31, 1) < 31);

	my %field = ();
	$field{'name'} = $self->extracttext($temp, 11);
	($field{'type'}, $field{'pos'}, $field{'len'}, $field{'dp'}) 
	    = unpack ("x11a1VCC", $temp);

	push (@{$dbf{'fieldinfo'}}, \%field);

	$dbf{'numfields'} ++;
    }

    # open the dbt file if we need to
    $dbtfullname = $fullname;
    if ($fullname =~ /f$/) {
	$dbtfullname =~ s/f$/t/;
    } else {
	$dbtfullname =~ s/F$/T/;
    }
    if ($dbf{'hasdbt'} && !open (DBTIN, $dbtfullname)) {
	print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
	close (FOXBASEIN);
	return undef;
    }

    # read in and process each record in the database
    my $numrecords = 0;
    while (($numrecords < $dbf{'numrecords'}) && 
	   (read (FOXBASEIN, $temp, $dbf{'recordlen'}) == $dbf{'recordlen'})) {

	# create a new record
	my $record = [];
	
	foreach $field (@{$dbf{'fieldinfo'}}) {
	    my $fieldvalue = "";
	    
	    if ($field->{'type'} eq "M" && $dbf{'hasdbt'}) {
		# a memo field, look up this field in the dbt file
		my $seekpos = substr ($temp, $field->{'pos'}, $field->{'len'});

		$seekpos =~ s/^\s*//;
		$seekpos = 0 unless $seekpos =~ /^\d+$/;
		
		$seekpos = $seekpos * 512;

		if ($seekpos == 0) {
		    # there is no memo field

		} elsif (seek (DBTIN, $seekpos, 0)) {
		    while (read (DBTIN, $fieldvalue, 512, length($fieldvalue)) > 0) {
			last if ($fieldvalue =~ /\cZ/);
		    }

		    # remove everything after the control-Z
		    substr($fieldvalue, index($fieldvalue, "\cZ")) = "";

		} else {
		    print STDERR "\nERROR - seek (to $seekpos) failed\n";
		}

	    } else {
		# a normal field
		$fieldvalue = substr ($temp, $field->{'pos'}, $field->{'len'});
	    }

	    push (@$record, {%$field, 'value'=>$fieldvalue});
	}

	# process this record
	$self->process_record ($pluginfo, $base_dir, $file, $metadata, $processor, 
			       $numrecords, $record);
	
	# finished another record...
	$numrecords++;
    }

    # close the dbt file if we need to
    if ($dbf{'hasdbt'}) {
	close (DBTIN);
    }

    # close the dbf file
    close (FOXBASEIN);

    # finished processing
    return 1;
}


# will extract a string from some larger string, making it
# conform to a number of constraints
sub extracttext {
    my $self = shift (@_);
    my ($text, $maxlen, $offset, $stopstr) = @_;
    $offset = 0 unless defined $offset;
    $stopstr = "\x00" unless defined $stopstr;
    
    # decide where the string finishes
    my $end = index ($text, $stopstr, $offset);
    $end = length ($text) if $end < 0;
    $end = $offset+$maxlen if (defined $maxlen) && ($end-$offset > $maxlen);
    
    return "" if ($end <= $offset);
    return substr ($text, $offset, $end-$offset);
}


# process_record should be overriden for a particular type
# of database. This default version outputs an html document
# containing all the fields in the record as a table.
# It also assumes that the text is in utf-8.
sub process_record {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor, $numrecords, $record) = @_;

    # create a new document
    my $doc_obj = new doc ($file, "indexed_doc");
    my $section = $doc_obj->get_top_section();

    # start of document
    $doc_obj->add_utf8_text($section, "<table>\n");

    # add each field
    foreach $field (@$record) {
	if (defined ($field->{'name'}) && defined ($field->{'value'})) {
	    $doc_obj->add_utf8_text($section, "  <tr>\n");
	    $doc_obj->add_utf8_text($section, "    <td>$field->{'name'}</td>\n");
	    $doc_obj->add_utf8_text($section, "    <td>$field->{'value'}</td>\n");
	    $doc_obj->add_utf8_text($section, "  </tr>\n");
	}
    }

    # end of document
    $doc_obj->add_utf8_text($section, "</table>\n");

    # add an object id
    $doc_obj->set_OID();

    # process the document
    $processor->process($doc_obj);
}


1;