package Kea; use BasPlug; #This function is called by BasPlug.pm when a flag in a collection configuration #document specifies that keyphrase metadata must be gathered for that collection. #It is passed as arguments, the documents text and possibly some options for how #the keyphrase data is to be collected if the keyphrase option flag was set in #the collection configuration file. This module then writes the documents text #to a file because the stand-alone program Kea which will be called to do the #actual extraction of the keyphrases expects a file argument. Once Kea has been #called upon, the file containing the keyphrase data gathered by Kea should be #stored in gsdl/tmp and this file is read, the data we are interested in is extracted #and passed back to BasPlug.pm in an appropriate format. sub extract_KeyPhrases { my $gsdlhome = $ENV{'GSDLHOME'}; my $doc = shift(@_); #documents text my $args = shift(@_); #any options my @optionlist = split(/ +/, $args) if (defined($args)); #list of options my $suffix = 'kea'; #default file will be called .kea my $command = ""; my @keylist; my @stemlist; print STDERR "optionlist: @optionlist\n"; foreach $element (@optionlist){ #for each option my ($option, $file) = split(/,/, $element); #split option letter and file (if file exist) $option = "-".$option; #place dash in front of option $file = "" if(!defined($file)); #no file options specified $suffix = $file if($option eq '-E'); #if option is extension (suffix) option $command .= " $option $file "; #add to list of commands } print STDERR "Using output suffix: $suffix\n"; # remove all HTML tags $doc =~ s/
]*>/\n/sgi;
$doc =~ s/