Changeset 11070


Ignore:
Timestamp:
2006-01-19T16:48:58+13:00 (18 years ago)
Author:
mdewsnip
Message:

A much tidier Kea.pm that now also works on Windows.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/Kea.pm

    r11069 r11070  
    11package Kea;
     2
     3use strict;
    24
    35# This function is called by BasPlug.pm when a flag in a collection
     
    2123
    2224# returns a string containing comma-separated keyphrases
    23 sub extract_KeyPhrases {
     25sub extract_KeyPhrases
     26{
     27    my $kea_version = shift(@_);
     28    my $doc = shift(@_);  # Document's text 
     29    my $args = shift(@_);  # Options
    2430
    25     # Parsing arguments of the function
    26     my $kea_version = shift(@_);
    27     my $doc = shift(@_); # documents text 
    28     my $args = shift(@_); # any options
    29     my @optionlist = split(/\s+/, $args) if (defined($args)); #list of options
    30 
    31     # Specifying directory names
    32     my $keahome = &get_Kea_directory($kea_version);
    33     my $defaultmodel = &util::filename_cat($keahome, "CSTR-20");
     31    # Set default models
     32    my $kea_home = &get_Kea_directory($kea_version);
     33    my $default_model_path = &util::filename_cat($kea_home, "CSTR-20");
    3434    if ($kea_version eq "4.0") {
    35     # Use a different model for Kea 4.0
    36     $defaultmodel = &util::filename_cat($keahome, "FAO-20docs");
     35    # Use a different default model for Kea 4.0
     36    $default_model_path = &util::filename_cat($kea_home, "FAO-20docs");
    3737    }
    3838
    39     # Initializing variables:
    40     my $command = "";
    41     my @keylist;
    42     my @options = ();
    43     $modelspec = 0;
    44 
    45     # Settings for the java executable:
    46 
    47     # CLASSPATH:
    48     $java_classpath = ".:$keahome";
    49 
    50     # See if java executable is on path
    51     my $java_exec="";
    52     if (system("which java >/dev/null 2>/dev/null")==0) {
    53     $java_exec=`which java`;
    54     chomp $java_exec;
    55     } else {
    56     $java_exec="$java_home/bin/java";
    57     }
    58    
    59     # The actual java command is based on these other variables:
    60     $java_command = "$java_exec -classpath \"$java_classpath\"";
    61    
    62     # end of java settings
    63 
    64     # Parsing options for keyphrase extraction:
    65     if (@optionlist) {
    66     foreach $element (@optionlist){ #for each option
    67         if (length($element) == 1) {
    68         push(@options, "-$element");
    69         } else {
    70         $option = substr($element, 0, 1);
    71         $value = substr($element,1);
    72         if (($option eq "m") && (-e "$keahome/$value")) {
    73             $modelspec = 1;
    74             push(@options, "-$option $keahome/$value");
    75         } elsif ($option eq "m") {         
    76             $modelspec = 1;
    77             print STDERR "Couldn't find model $value. Using the default model instead\n";
    78             push(@options, "-$option $defaultmodel");
    79         } else {
    80             push(@options, "-$option $value");
     39    # Parse the Kea options
     40    my $options_string;
     41    my @args_list = split(/\s+/, $args) if (defined($args));
     42    if (@args_list) {
     43    my $model_specified = 0;
     44    foreach my $arg (@args_list) {
     45        if (length($arg) == 1) {
     46        $options_string .= " -$arg";
     47        }
     48        else {
     49        my $option = substr($arg, 0, 1);
     50        my $value = substr($arg, 1);
     51        if ($option eq "m") {
     52            my $model_path = &util::filename_cat($kea_home, $value);
     53            if (-e $model_path) {
     54            $options_string .= " -m $model_path";
     55            }
     56            else {
     57            print STDERR "Warning: Couldn't find model $model_path; using the default model instead.\n";
     58            $options_string .= " -m $default_model_path";
     59            }
     60            $model_specified = 1;
    8161        }
    82        
     62        else {
     63            $options_string .= " -$option $value";
     64        }
    8365        }
    8466    }
    85     # if none of the option specifies the model, set the default one:
    86     if ($modelspec != 1) {
    87         push(@options, "-m $defaultmodel");
     67
     68    # If none of the option specifies the model, use the default one
     69    if ($model_specified != 1) {
     70        $options_string .= " -m $default_model_path";
    8871    }
    89     $options = join(" ",@options);
    90     # print STDERR "OPTIONS: $options\n";
    91     } else {
    92     # If no options were specified: Set default value for the model
    93     $options = "-m $defaultmodel"; 
     72    }
     73    else {
     74    # If no options were specified, use the default model
     75    $options_string = "-m $default_model_path";
    9476    }
    9577
     
    10082    $doc =~ tr/\n/\n/s;
    10183
    102     # Write text to a temporary file doc.txt
    103     my $gsdlhome = $ENV{'GSDLHOME'};
    104     open(OUT, ">$gsdlhome/tmp/doc.txt") or die "In Kea.pm doc.txt could not be created\n"; 
    105     print OUT $doc;
    106     close(OUT);
     84    # Write text to a temporary file doc.txt
     85    my $tmp_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
     86    my $doc_txt_file_path = &util::filename_cat($tmp_directory_path, "doc.txt");
     87    open(DOC_TXT, ">$doc_txt_file_path") or die "Error: Could not write $doc_txt_file_path in Kea.pm.\n"; 
     88    print DOC_TXT $doc;
     89    close(DOC_TXT);
    10790
     91    # Run Kea with the specified options
     92    system("java -classpath \"$kea_home\" KEAKeyphraseExtractor -l $tmp_directory_path $options_string");
    10893
    109     # EXECUTE KEA with specific options:
    110     $command = "$java_command KEAKeyphraseExtractor -l $gsdlhome/tmp $options";
    111     system ("$command");
     94    # Read the resulting doc.key file which contains the keyphrases
     95    my $doc_key_file_path = &util::filename_cat($tmp_directory_path, "doc.key");
     96    if (!open(IN, "<$doc_key_file_path")) {
     97    # The doc.key file does not exist (either an option was wrongly specified, or no keyphrases were found)
     98    return "";
     99    }
    112100
    113     # Read the resulting doc.key, which contains keyphrases:
    114 
    115     open(IN, "<$gsdlhome/tmp/doc.key") or return "";
    116                                           #this means doc.key does not exist
    117                                           #either because an option was wrongly specified
    118                                       #or no keyphrases were found
    119     while(<IN>){
     101    my @keyphrase_list = ();
     102    while (<IN>) {
    120103    chomp;
    121     push(@keylist,$_);
     104    push(@keyphrase_list, $_);
    122105    }
    123106    close(IN);
    124107
    125     $keylist = join(", ", @keylist);
     108    # Delete doc.key so that in future it will not be opened and read (otherwise KEA sees it as more keyphrases!)
     109    unlink($doc_key_file_path);
    126110
    127     # Delete doc.key so that in future it will not be opened and read.
    128     # Otherwise KEA sees it as more keyphrases!
    129 
    130     unlink("$gsdlhome/tmp/doc.key");
    131    
    132     return $keylist;
     111    my $keyphrases = join(", ", @keyphrase_list);
     112    return $keyphrases;
    133113}
    134114
Note: See TracChangeset for help on using the changeset viewer.