Changeset 6792


Ignore:
Timestamp:
2004-02-12T15:56:37+13:00 (20 years ago)
Author:
jrm21
Message:

tidied up code quite a bit, remove tmp files after use.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/Kea.pm

    r2018 r6792  
    11package Kea;
     2
     3use strict;
    24
    35# This function is called by BasPlug.pm when a flag in a collection
     
    2123    my @optionlist = split(/ +/, $args) if (defined($args)); #list of options
    2224    my $suffix = 'kea'; #default file will be called .kea
    23     my $command = "";
     25    my @kea_options;
    2426    my @keylist;
    2527    my @stemlist;
    2628
    27     print STDERR "optionlist: @optionlist\n";
    28        
    29     foreach $element (@optionlist){ #for each option
     29
     30    foreach my $element (@optionlist){ #for each option
    3031    my ($option, $file) = split(/,/, $element); #split option letter and file (if file exist)
    3132
    3233    $option  = "-".$option; #place dash in front of option
    33     $file = "" if(!defined($file)); #no file options specified
    34     $suffix = $file if($option eq '-E'); #if option is extension (suffix) option
    35     $command .= " $option $file "; #add to list of commands
     34    push @kea_options, $option;
     35    if (defined($file)) {
     36        push @kea_options, $file;
     37    }
     38
     39    if ($option eq '-E')  # option is extension (suffix) option
     40        { $suffix = $file }
    3641    }
    3742
    38     print STDERR "Using output suffix: $suffix\n";
     43    # print STDERR "Using output suffix: $suffix\n";
    3944
    4045    # remove all HTML tags
    41     $doc =~ s/<P[^>]*>/\n/sgi;
    42     $doc =~ s/<H[^>]*>/\n/sgi;
    43     $doc =~ s/<[^>]*>//sgi;
    44     $doc =~ tr/\n/\n/s;
     46    $doc =~ s/<[ph][^>]*>/\n/sgi; # replace headings/paragraphs with newline
     47    $doc =~ s/<[^>]*>/ /sgi; # replace all others with a space
    4548
    46     #write text to a file eg doc.txt
    47     open(OUT, ">$gsdlhome/tmp/doc.txt") or die "In Kea.pm doc.txt could not be created\n"; 
     49    # &gt; lt amp
     50    $doc =~ s/\&(?:gt|lt|amp)\;/ /gi;
     51
     52    my $tmpfile="$gsdlhome/tmp/doc.txt";
     53    open(OUT, ">$tmpfile") or die "Kea.pm could not create doc.txt: $!\n"; 
    4854    print OUT $doc;
    4955    close(OUT);
    5056
    51     #call Kea with specifed options
    52     `$gsdlhome/perllib/Kea-1.1.4/Kea $command $gsdlhome/tmp/doc.txt`;
     57    # call Kea with specifed options
     58    system("$gsdlhome/perllib/Kea-1.1.4/Kea", @kea_options,
     59       $tmpfile);
    5360
    54     #read doc.kea with keywords
    55     open(IN, "<$gsdlhome/tmp/doc.$suffix") or return @emptykeylist;
    56                                           #this means doc.kea does not exist
    57                                           #either because an option was wrongly specified
    58                                       #or no keyphrases were found
     61    unlink($tmpfile); # don't need this file anymore
     62
     63    # read doc.kea with keywords
     64    my $inputfile="$gsdlhome/tmp/doc.$suffix";
     65
     66    # If this file doesn't exist, then either an option was wrongly specified
     67    # or no keyphrases were found
     68    open(IN, "<$inputfile") or return ();
     69
    5970    while(<IN>){
    6071    chomp;
    61     @key = split(/\t/); #split into array separated by a tab
     72    my @key = split(/\t/); #split into array separated by a tab
    6273    push(@keylist, $key[0]); #add to list of keywords
    6374    push(@stemlist, $key[1]); #add to list of stems 
     
    6677
    6778    #put data into appropriate format
    68     $keylist = join(", ", @keylist);
    69     $stemlist = join(", ", @stemlist); 
     79    my $keylistref = join(", ", @keylist);
     80    my $stemlistref = join(", ", @stemlist);   
    7081 
    71     #delete doc.extension so that in future it will not be opened and read
    72     `rm $gsdlhome/tmp/doc.$suffix`;
     82    # delete doc.extension so that in future it will not be opened and read
     83    unlink($inputfile);
    7384
    74     #return keywords + stems to basplug
    75     my @keystemlist = ($keylist, $stemlist);
    76     return @keystemlist;
    77 
     85    # return keywords + stems to basplug
     86    return ($keylistref, $stemlistref);
    7887}
    7988
Note: See TracChangeset for help on using the changeset viewer.