###########################################################################
#
# TEXTPlug.pm -- simple text plugin
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# creates simple single-level document from .txt or .text files 
# (case-insensitive match on filenames). Adds Title metadata 
# of first 100 characters found.

package TEXTPlug;

use BasPlug;
use sorttools;

sub BEGIN {
    @ISA = ('BasPlug');
}

sub new {
    my ($class) = @_;
    $self = new BasPlug ();

    return bless $self, $class;
}

sub is_recursive {
    my $self = shift (@_);

    return 0; # this is not a recursive plugin
}


# return number of files processed, undef if can't process
# Note that $base_dir might be "" and that $file might 
# include directories
sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;

    my $filename = &util::filename_cat($base_dir, $file);

    return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename));

    my $gz = 0;
    if (defined $2) {
	$gz = $2;
	$gz = 1 if ($gz =~ /\.gz/i);
    }

    print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'};

    # create a new document
    my $doc_obj = new doc ($file, "indexed_doc");

    if ($gz) {
	open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n";
    } else {
	open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n";
    }
    my $cursection = $doc_obj->get_top_section();

    my $text = "";
    my $line = "";
    my $foundtitle = 0;
    # don't need to get title if it has been passed
    # in from another plugin
    if (defined $metadata->{'Title'}) {
	$foundtitle = 1;
    }
    while (defined ($line = <FILE>)) {
	# use first line as title (or first 100 characters if it's long)
	if (!$foundtitle && length($line) > 5) {
	    my $title = "";
	    if (length($line) > 100) {
		$title = substr ($line, 0, 100);
	    } else {
		$title = $line;
	    }
	    $doc_obj->add_metadata ($cursection, "Title", $title);
	    $foundtitle = 1;
	}
	$text .= $line;
    }
    
    $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>");


    foreach $field (keys(%$metadata)) {
	# $metadata->{$field} may be an array reference
	if (ref ($metadata->{$field}) eq "ARRAY") {
	    map { 
		$doc_obj->add_metadata ($cursection, $field, $_); 
	    } @{$metadata->{$field}};
	} else {
	    $doc_obj->add_metadata ($cursection, $field, $metadata->{$field}); 
	}
    }

    # add OID
    $doc_obj->set_OID ();

    # process the document
    $processor->process($doc_obj);

    return 1; # processed the file
}

1;