Changeset 6792
- Timestamp:
- 2004-02-12T15:56:37+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/Kea.pm
r2018 r6792 1 1 package Kea; 2 3 use strict; 2 4 3 5 # This function is called by BasPlug.pm when a flag in a collection … … 21 23 my @optionlist = split(/ +/, $args) if (defined($args)); #list of options 22 24 my $suffix = 'kea'; #default file will be called .kea 23 my $command = "";25 my @kea_options; 24 26 my @keylist; 25 27 my @stemlist; 26 28 27 print STDERR "optionlist: @optionlist\n"; 28 29 foreach $element (@optionlist){ #for each option 29 30 foreach my $element (@optionlist){ #for each option 30 31 my ($option, $file) = split(/,/, $element); #split option letter and file (if file exist) 31 32 32 33 $option = "-".$option; #place dash in front of option 33 $file = "" if(!defined($file)); #no file options specified 34 $suffix = $file if($option eq '-E'); #if option is extension (suffix) option 35 $command .= " $option $file "; #add to list of commands 34 push @kea_options, $option; 35 if (defined($file)) { 36 push @kea_options, $file; 37 } 38 39 if ($option eq '-E') # option is extension (suffix) option 40 { $suffix = $file } 36 41 } 37 42 38 print STDERR "Using output suffix: $suffix\n";43 # print STDERR "Using output suffix: $suffix\n"; 39 44 40 45 # remove all HTML tags 41 $doc =~ s/<P[^>]*>/\n/sgi; 42 $doc =~ s/<H[^>]*>/\n/sgi; 43 $doc =~ s/<[^>]*>//sgi; 44 $doc =~ tr/\n/\n/s; 46 $doc =~ s/<[ph][^>]*>/\n/sgi; # replace headings/paragraphs with newline 47 $doc =~ s/<[^>]*>/ /sgi; # replace all others with a space 45 48 46 #write text to a file eg doc.txt 47 open(OUT, ">$gsdlhome/tmp/doc.txt") or die "In Kea.pm doc.txt could not be created\n"; 49 # > lt amp 50 $doc =~ s/\&(?:gt|lt|amp)\;/ /gi; 51 52 my $tmpfile="$gsdlhome/tmp/doc.txt"; 53 open(OUT, ">$tmpfile") or die "Kea.pm could not create doc.txt: $!\n"; 48 54 print OUT $doc; 49 55 close(OUT); 50 56 51 #call Kea with specifed options 52 `$gsdlhome/perllib/Kea-1.1.4/Kea $command $gsdlhome/tmp/doc.txt`; 57 # call Kea with specifed options 58 system("$gsdlhome/perllib/Kea-1.1.4/Kea", @kea_options, 59 $tmpfile); 53 60 54 #read doc.kea with keywords 55 open(IN, "<$gsdlhome/tmp/doc.$suffix") or return @emptykeylist; 56 #this means doc.kea does not exist 57 #either because an option was wrongly specified 58 #or no keyphrases were found 61 unlink($tmpfile); # don't need this file anymore 62 63 # read doc.kea with keywords 64 my $inputfile="$gsdlhome/tmp/doc.$suffix"; 65 66 # If this file doesn't exist, then either an option was wrongly specified 67 # or no keyphrases were found 68 open(IN, "<$inputfile") or return (); 69 59 70 while(<IN>){ 60 71 chomp; 61 @key = split(/\t/); #split into array separated by a tab72 my @key = split(/\t/); #split into array separated by a tab 62 73 push(@keylist, $key[0]); #add to list of keywords 63 74 push(@stemlist, $key[1]); #add to list of stems … … 66 77 67 78 #put data into appropriate format 68 $keylist= join(", ", @keylist);69 $stemlist= join(", ", @stemlist);79 my $keylistref = join(", ", @keylist); 80 my $stemlistref = join(", ", @stemlist); 70 81 71 # delete doc.extension so that in future it will not be opened and read72 `rm $gsdlhome/tmp/doc.$suffix`;82 # delete doc.extension so that in future it will not be opened and read 83 unlink($inputfile); 73 84 74 #return keywords + stems to basplug 75 my @keystemlist = ($keylist, $stemlist); 76 return @keystemlist; 77 85 # return keywords + stems to basplug 86 return ($keylistref, $stemlistref); 78 87 } 79 88
Note:
See TracChangeset
for help on using the changeset viewer.