###########################################################################
#
# acronym.pm --
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# class to hold acronyms

use strict;
#use diagnostics;

package acronym;
#use Class::Struct;


sub new {
    my $self = [
	"", # 0 acronym
	[], # 1 definition
	[], # 2 stop_words
	0,  # 3 letters_for_far
	
    ];
    bless $self;
}

#struct (
#	# core items
#	acronym => '$',     # the acronym (a string)
#	definition => '@',  # the acronyms defintion (an array of strings)
#        stop_words => '@',  # an array of 1 (stop word) and 0 (non stop word)
#
#        # items related to the context in which the acronym was mined
#        word_count => '$',        # the index of the acronym within the text
#	definition_offset => '$', # the distance between the acronym and the definition.
#        
#        #temporary items used during the mining of the acronym
#        letters_for_far => '$', # how many letters have we found so far ?
#
#	#calculated items
#	#...
#
#	);

#sub definition {
#    my $self = shift (@_);
#    my @def = @$self->[2];
#    print "definition::\@de+f = " . @def . "\n";
#    print "definition::\@_ = " . @_ . "\n";
#    if (@_) {
#	push  @def, @_;
#	@$self->[2] = @def;
#    }
#    return @def;
#}


sub clone  {
    my $self = shift (@_);

    my $copy = new acronym;

    $copy->[0] = $self->[0];
    push @{$copy->[1]}, @{$self->[1]};
    push @{$copy->[2]}, @{$self->[2]};
    $copy->[3] = $self->[3];
    $copy->[4] = $self->[4];
    $copy->[5] = $self->[5];
    $copy->[6] = $self->[6];
    
    return $copy;
}

#sub initialise {
#    my $self = shift (@_);
#
#    # initialise the struct from the parameters ...
#    my($acro, $wc, $def) = @_;
#    $self->acronym($acro);
#    $self->word_count($wc);
#    $self->definition_offset($def);
#
#    $self->letters_for_far(0);
#}

#print out the kwic for the acronym
sub to_string_kwic {
    my $self = shift (@_);

    # the list of all possible combinations
    my @list = ();

    my $result = "";

    my $j = 0;
    my @array = @{$self->[1]};
    while ($j <= $#array)
    {

	$result = "";
	
	# do the definition
	my $i = 0;
	while ($i <= $#array)
	{
	    my $current = ($i + $j) % ($#array+1);
	    $result = $result . $array[$current] . " ";
	    $i++;
	}
	$result = $result . "(" . $self->[0] . ")";

	push @list, $result;
	$j++;
    }
    return @list;
}

#this is the one used when building the collection ...
sub to_string {
    my $self = shift (@_);

    my $result = $self->[0] . " ";

    # do the definition
    my @array = @{$self->[1]};
    my $i = 0;
    while ($i <= $#array)
    {
	my $resultnext = $result . $array[$i] . " ";
	$result = $resultnext;
	$i++;
    }
    return $result;
}

# called when the acronym is complete and after altering any stats to compute stats etc.
sub stablise {


}

sub acronyms {
    my $processed_text =  shift @_;
    $$processed_text =~ s/[^A-Za-z]/ /g;
    $$processed_text =~ s/\s+/ /g;

    return &acronyms_from_clean_text($processed_text)
}

sub acronyms_from_clean_text {
    my ($processed_text) = @_;
    my @acro_list = ();

    my @text = split / /, $$processed_text;

#    my $i = 0;
#    while ($i <= $#text)
#    {
#	print $text->[$i] . "\n";
#	$i++;
#    }

    my $first = 0;
    my $last = $#text +1;
    my $acro_counter = $first;
    
    while ($acro_counter < $last)
    {
	my $word = $text[$acro_counter];

	# the tests on the following line are VERY important to the performance of this algorithm
	# be VERY careful when relaxing them...
	if (length $word >= 3 && (uc($word) eq $word))
	{
	    my $def_counter = 0;
	    while ($def_counter <= $last)
	    {
		my $letter_counter = 0;
		my $match = 1; 
		while ($letter_counter < length($word))
		{
		    if ($def_counter+$letter_counter >= $last)
		    {
			$match = 0;
			last;
		    }
		    my $def_word = $text[$def_counter+$letter_counter];

		    #throw it out if it's recursing...
		    if (uc($word) eq uc($def_word))
		    {
			$match = 0;
			last
		    }
		    if (substr($word, $letter_counter, 1) ne substr($def_word, 0, 1))
		    {
			$match = 0;
			last;
		    }
		    $letter_counter++;
		}
		# this line should perhaps be more sophisticated ... it encodes what we consider 
                # to be a valid acronym 
		if ($match == 1 && $letter_counter > 0 && (abs($def_counter - $acro_counter)< 50))
		{
		    my $acro = new acronym();
		    $acro->[0] = $word;
		    push @{$acro->[1]}, @text[$def_counter .. $def_counter + $letter_counter - 1 ];
		    $acro->[3] = $letter_counter;
#		    my @tmp = ( $acro );
		    push @acro_list, ( $acro );
#		    print $acro->to_string(). "\n";
		    $match = 0;
		}
		$def_counter++;
	    }
	}
	$acro_counter++;
    }

    return \@acro_list;
}


sub test {

#    my $blarg = new acronym();
#    my $simple;
#    $simple = 10;
#    $blarg->initialise($simple, $simple, $simple);
#    my $blarg2 = $blarg->clone();
#    print $blarg->to_string();
#    print $blarg2;
#    print "\n";
#    
    my $tla = new acronym();
    $tla->[0] = "TLA";

    my @array = ("Three", "Letter", "Acronym");
#    my $i = 0;
#    while ($i <= $#array)
#    {
#	print @array[$i] . "\n";
#	$i++;
#    }

    print "\n";
    push @{$tla->[1]}, ("Three" );
    push @{$tla->[1]}, ("Letter" );
    push @{$tla->[1]}, ("Letter" );
    push @{$tla->[1]}, ("Acronym" );
    print $tla->to_string(). "\n";
    print "\n";
    print "\n";
    my $tla2 = $tla->clone();
    push @{$tla2->[1]}, ("One");
    push @{$tla2->[1]}, ("Two");
    $tla2->[0] = "ALT";
    print $tla->to_string(). "\n";
    print $tla2->to_string(). "\n";

    print "\n";
    print "\n";

}

#uncomment this line to test this package
#&test();

1;